bayesnet/XSP2DE_8cc_source.html

// ***************************************************************

// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez

// SPDX-FileType: SOURCE

// SPDX-License-Identifier: MIT

// ***************************************************************


#include "XSP2DE.h"

#include <pthread.h>   // for pthread_setname_np on linux

#include <cassert>

#include <cmath>

#include <limits>

#include <stdexcept>

#include <iostream>

#include "bayesnet/utils/TensorUtils.h"


namespace bayesnet {


// --------------------------------------

// Constructor

// --------------------------------------

XSp2de::XSp2de(int spIndex1, int spIndex2)

  : superParent1_{ spIndex1 }

  , superParent2_{ spIndex2 }

  , nFeatures_{0}

  , statesClass_{0}

  , alpha_{1.0}

  , initializer_{1.0}

  , semaphore_{ CountingSemaphore::getInstance() }

  , Classifier(Network())

{

  validHyperparameters = { "parent1", "parent2" };

}


// --------------------------------------

// setHyperparameters

// --------------------------------------

void XSp2de::setHyperparameters(const nlohmann::json &hyperparameters_)

{

  auto hyperparameters = hyperparameters_;

  if (hyperparameters.contains("parent1")) {

    superParent1_ = hyperparameters["parent1"];

    hyperparameters.erase("parent1");

  }

  if (hyperparameters.contains("parent2")) {

    superParent2_ = hyperparameters["parent2"];

    hyperparameters.erase("parent2");

  }

  // Hand off anything else to base Classifier

  Classifier::setHyperparameters(hyperparameters);

}


// --------------------------------------

// fitx

// --------------------------------------

void XSp2de::fitx(torch::Tensor & X, torch::Tensor & y,

                  torch::Tensor & weights_, const Smoothing_t smoothing)

{

  m = X.size(1);  // number of samples

  n = X.size(0);  // number of features

  dataset = X;


  // Build the dataset in your environment if needed:

  buildDataset(y);


  // Construct the data structures needed for counting

  buildModel(weights_);


  // Accumulate counts & convert to probabilities

  trainModel(weights_, smoothing);

  fitted = true;

}


// --------------------------------------

// buildModel

// --------------------------------------

void XSp2de::buildModel(const torch::Tensor &weights)

{

  nFeatures_ = n;


  // Derive the number of states for each feature from the dataset

  // states_[f] = max value in dataset[f] + 1.

  states_.resize(nFeatures_);

  for (int f = 0; f < nFeatures_; f++) {

    // This is naive: we take max in feature f. You might adapt for real data.

    states_[f] = dataset[f].max().item<int>() + 1;

  }

  // Class states:

  statesClass_ = dataset[-1].max().item<int>() + 1;


  // Initialize the class counts

  classCounts_.resize(statesClass_, 0.0);


  // For sp1 -> p(sp1Val| c)

  sp1FeatureCounts_.resize(states_[superParent1_] * statesClass_, 0.0);


  // For sp2 -> p(sp2Val| c)

  sp2FeatureCounts_.resize(states_[superParent2_] * statesClass_, 0.0);


  // For child features, we store p(childVal | c, sp1Val, sp2Val).

  // childCounts_ will hold raw counts. We’ll gather them in one big vector.

  // We need an offset for each feature.

  childOffsets_.resize(nFeatures_, -1);


  int totalSize = 0;

  for (int f = 0; f < nFeatures_; f++) {

    if (f == superParent1_ || f == superParent2_) {

      // skip the superparents

      childOffsets_[f] = -1;

      continue;

    }

    childOffsets_[f] = totalSize;

    // block size for a single child f: states_[f] * statesClass_

    //                               * states_[superParent1_]

    //                               * states_[superParent2_].

    totalSize += (states_[f] * statesClass_

                  * states_[superParent1_]

                  * states_[superParent2_]);

  }

  childCounts_.resize(totalSize, 0.0);

}


// --------------------------------------

// trainModel

// --------------------------------------

void XSp2de::trainModel(const torch::Tensor &weights,

                        const bayesnet::Smoothing_t smoothing)

{

  // Accumulate raw counts

  for (int i = 0; i < m; i++) {

    std::vector<int> instance(nFeatures_ + 1);

    for (int f = 0; f < nFeatures_; f++) {

      instance[f] = dataset[f][i].item<int>();

    }

    instance[nFeatures_] = dataset[-1][i].item<int>();  // class

    double w = weights[i].item<double>();

    addSample(instance, w);

  }


  // Choose alpha based on smoothing:

  switch (smoothing) {

    case bayesnet::Smoothing_t::ORIGINAL:

      alpha_ = 1.0 / m;

      break;

    case bayesnet::Smoothing_t::LAPLACE:

      alpha_ = 1.0;

      break;

    default:

      alpha_ = 0.0; // no smoothing

  }


  // Large initializer factor for numerical stability

  initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);


  // Convert raw counts to probabilities

  computeProbabilities();

}


// --------------------------------------

// addSample

// --------------------------------------

void XSp2de::addSample(const std::vector<int> &instance, double weight)

{

  if (weight <= 0.0)

    return;


  int c = instance.back();

  // increment classCounts

  classCounts_[c] += weight;


  int sp1Val = instance[superParent1_];

  int sp2Val = instance[superParent2_];


  // p(sp1|c)

  sp1FeatureCounts_[sp1Val * statesClass_ + c] += weight;


  // p(sp2|c)

  sp2FeatureCounts_[sp2Val * statesClass_ + c] += weight;


  // p(childVal| c, sp1Val, sp2Val)

  for (int f = 0; f < nFeatures_; f++) {

    if (f == superParent1_ || f == superParent2_)

      continue;


    int childVal = instance[f];

    int offset = childOffsets_[f];

    // block layout:

    //    offset + (sp1Val*(states_[sp2_]* states_[f]* statesClass_))

    //            + (sp2Val*(states_[f]* statesClass_))

    //            + childVal*(statesClass_)

    //            + c

    int blockSizeSp2 = states_[superParent2_]

                       * states_[f]

                       * statesClass_;

    int blockSizeChild = states_[f] * statesClass_;


    int idx = offset

            + sp1Val*blockSizeSp2

            + sp2Val*blockSizeChild

            + childVal*statesClass_

            + c;

    childCounts_[idx] += weight;

  }

}


// --------------------------------------

// computeProbabilities

// --------------------------------------

void XSp2de::computeProbabilities()

{

  double totalCount = std::accumulate(classCounts_.begin(),

                                      classCounts_.end(), 0.0);


  // classPriors_

  classPriors_.resize(statesClass_, 0.0);

  if (totalCount <= 0.0) {

    // fallback => uniform

    double unif = 1.0 / static_cast<double>(statesClass_);

    for (int c = 0; c < statesClass_; c++) {

      classPriors_[c] = unif;

    }

  } else {

    for (int c = 0; c < statesClass_; c++) {

      classPriors_[c] =

        (classCounts_[c] + alpha_)

        / (totalCount + alpha_ * statesClass_);

    }

  }


  // p(sp1Val| c)

  sp1FeatureProbs_.resize(sp1FeatureCounts_.size());

  int sp1Card = states_[superParent1_];

  for (int spVal = 0; spVal < sp1Card; spVal++) {

    for (int c = 0; c < statesClass_; c++) {

      double denom = classCounts_[c] + alpha_ * sp1Card;

      double num = sp1FeatureCounts_[spVal * statesClass_ + c] + alpha_;

      sp1FeatureProbs_[spVal * statesClass_ + c] =

         (denom <= 0.0 ? 0.0 : num / denom);

    }

  }


  // p(sp2Val| c)

  sp2FeatureProbs_.resize(sp2FeatureCounts_.size());

  int sp2Card = states_[superParent2_];

  for (int spVal = 0; spVal < sp2Card; spVal++) {

    for (int c = 0; c < statesClass_; c++) {

      double denom = classCounts_[c] + alpha_ * sp2Card;

      double num = sp2FeatureCounts_[spVal * statesClass_ + c] + alpha_;

      sp2FeatureProbs_[spVal * statesClass_ + c] =

         (denom <= 0.0 ? 0.0 : num / denom);

    }

  }


  // p(childVal| c, sp1Val, sp2Val)

  childProbs_.resize(childCounts_.size());

  int offset = 0;

  for (int f = 0; f < nFeatures_; f++) {

    if (f == superParent1_ || f == superParent2_)

      continue;


    int fCard = states_[f];

    int sp1Card_ = states_[superParent1_];

    int sp2Card_ = states_[superParent2_];

    int childBlockSizeSp2 = sp2Card_ * fCard * statesClass_;

    int childBlockSizeF   = fCard * statesClass_;


    int blockSize = fCard * sp1Card_ * sp2Card_ * statesClass_;

    for (int sp1Val = 0; sp1Val < sp1Card_; sp1Val++) {

      for (int sp2Val = 0; sp2Val < sp2Card_; sp2Val++) {

        for (int childVal = 0; childVal < fCard; childVal++) {

          for (int c = 0; c < statesClass_; c++) {

            // index in childCounts_

            int idx = offset

                    + sp1Val*childBlockSizeSp2

                    + sp2Val*childBlockSizeF

                    + childVal*statesClass_

                    + c;

            double num = childCounts_[idx] + alpha_;

            // denominator is the count of (sp1Val,sp2Val,c) plus alpha * fCard

            // We can find that by summing childVal dimension, but we already

            // have it in childCounts_[...] or we can re-check the superparent

            // counts if your approach is purely hierarchical.

            // Here we'll do it like the XSpode approach: sp1&sp2 are

            // conditionally independent given c, so denominators come from

            // summing the relevant block or we treat sp1,sp2 as "parents."

            // A simpler approach:

            double sumSp1Sp2C = 0.0;

            // sum over all childVal:

            for (int cv = 0; cv < fCard; cv++) {

              int idx2 = offset

                       + sp1Val*childBlockSizeSp2

                       + sp2Val*childBlockSizeF

                       + cv*statesClass_ + c;

              sumSp1Sp2C += childCounts_[idx2];

            }

            double denom = sumSp1Sp2C + alpha_ * fCard;

            childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);

          }

        }

      }

    }

    offset += blockSize;

  }

}


// --------------------------------------

// predict_proba (single instance)

// --------------------------------------

std::vector<double> XSp2de::predict_proba(const std::vector<int> &instance) const

{

  if (!fitted) {

    throw std::logic_error(CLASSIFIER_NOT_FITTED);

  }

  std::vector<double> probs(statesClass_, 0.0);


  int sp1Val = instance[superParent1_];

  int sp2Val = instance[superParent2_];


  // Start with p(c) * p(sp1Val| c) * p(sp2Val| c)

  for (int c = 0; c < statesClass_; c++) {

    double pC = classPriors_[c];

    double pSp1C = sp1FeatureProbs_[sp1Val * statesClass_ + c];

    double pSp2C = sp2FeatureProbs_[sp2Val * statesClass_ + c];

    probs[c] = pC * pSp1C * pSp2C * initializer_;

  }


  // Multiply by each child feature f

  int offset = 0;

  for (int f = 0; f < nFeatures_; f++) {

    if (f == superParent1_ || f == superParent2_)

      continue;


    int valF = instance[f];

    int fCard = states_[f];

    int sp1Card = states_[superParent1_];

    int sp2Card = states_[superParent2_];

    int blockSizeSp2 = sp2Card * fCard * statesClass_;

    int blockSizeF   = fCard * statesClass_;


    // base index for childProbs_ for this child and sp1Val, sp2Val

    int base = offset

             + sp1Val*blockSizeSp2

             + sp2Val*blockSizeF

             + valF*statesClass_;

    for (int c = 0; c < statesClass_; c++) {

      probs[c] *= childProbs_[base + c];

    }

    offset += (fCard * sp1Card * sp2Card * statesClass_);

  }


  // Normalize

  normalize(probs);

  return probs;

}


// --------------------------------------

// predict_proba (batch)

// --------------------------------------

std::vector<std::vector<double>> XSp2de::predict_proba(std::vector<std::vector<int>> &test_data)

{

  int test_size = test_data[0].size();  // each feature is test_data[f], size = #samples

  int sample_size = test_data.size();   // = nFeatures_

  std::vector<std::vector<double>> probabilities(

      test_size, std::vector<double>(statesClass_, 0.0));


  // same concurrency approach

  int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);

  std::vector<std::thread> threads;


  auto worker = [&](const std::vector<std::vector<int>> &samples,

                    int begin,

                    int chunk,

                    int sample_size,

                    std::vector<std::vector<double>> &predictions) {

    std::string threadName =

      "XSp2de-" + std::to_string(begin) + "-" + std::to_string(chunk);

#if defined(__linux__)

    pthread_setname_np(pthread_self(), threadName.c_str());

#else

    pthread_setname_np(threadName.c_str());

#endif


    std::vector<int> instance(sample_size);

    for (int sample = begin; sample < begin + chunk; ++sample) {

      for (int feature = 0; feature < sample_size; ++feature) {

        instance[feature] = samples[feature][sample];

      }

      predictions[sample] = predict_proba(instance);

    }

    semaphore_.release();

  };


  for (int begin = 0; begin < test_size; begin += chunk_size) {

    int chunk = std::min(chunk_size, test_size - begin);

    semaphore_.acquire();

    threads.emplace_back(worker, test_data, begin, chunk, sample_size,

                         std::ref(probabilities));

  }

  for (auto &th : threads) {

    th.join();

  }

  return probabilities;

}


// --------------------------------------

// predict (single instance)

// --------------------------------------

int XSp2de::predict(const std::vector<int> &instance) const

{

  auto p = predict_proba(instance);

  return static_cast<int>(

    std::distance(p.begin(), std::max_element(p.begin(), p.end()))

  );

}


// --------------------------------------

// predict (batch of data)

// --------------------------------------

std::vector<int> XSp2de::predict(std::vector<std::vector<int>> &test_data)

{

  auto probabilities = predict_proba(test_data);

  std::vector<int> predictions(probabilities.size(), 0);


  for (size_t i = 0; i < probabilities.size(); i++) {

    predictions[i] = static_cast<int>(

      std::distance(probabilities[i].begin(),

                    std::max_element(probabilities[i].begin(),

                                     probabilities[i].end()))

    );

  }

  return predictions;

}


// --------------------------------------

// predict (torch::Tensor version)

// --------------------------------------

torch::Tensor XSp2de::predict(torch::Tensor &X)

{

  auto X_ = TensorUtils::to_matrix(X);

  auto result_v = predict(X_);

  return torch::tensor(result_v, torch::kInt32);

}


// --------------------------------------

// predict_proba (torch::Tensor version)

// --------------------------------------

torch::Tensor XSp2de::predict_proba(torch::Tensor &X)

{

  auto X_ = TensorUtils::to_matrix(X);

  auto result_v = predict_proba(X_);

  int n_samples = X.size(1);

  torch::Tensor result =

    torch::zeros({ n_samples, statesClass_ }, torch::kDouble);

  for (int i = 0; i < (int)result_v.size(); ++i) {

    result.index_put_({ i, "..." }, torch::tensor(result_v[i]));

  }

  return result;

}


// --------------------------------------

// score (torch::Tensor version)

// --------------------------------------

float XSp2de::score(torch::Tensor &X, torch::Tensor &y)

{

  torch::Tensor y_pred = predict(X);

  return (y_pred == y).sum().item<float>() / y.size(0);

}


// --------------------------------------

// score (vector version)

// --------------------------------------

float XSp2de::score(std::vector<std::vector<int>> &X, std::vector<int> &y)

{

  auto y_pred = predict(X);

  int correct = 0;

  for (size_t i = 0; i < y_pred.size(); ++i) {

    if (y_pred[i] == y[i]) {

      correct++;

    }

  }

  return static_cast<float>(correct) / static_cast<float>(y_pred.size());

}


// --------------------------------------

// Utility: normalize

// --------------------------------------

void XSp2de::normalize(std::vector<double> &v) const

{

  double sum = 0.0;

  for (auto &val : v) {

    sum += val;

  }

  if (sum > 0.0) {

    for (auto &val : v) {

      val /= sum;

    }

  }

}


// --------------------------------------

// to_string

// --------------------------------------

std::string XSp2de::to_string() const

{

  std::ostringstream oss;

  oss << "----- XSp2de Model -----\n"

      << "nFeatures_    = " << nFeatures_    << "\n"

      << "superParent1_ = " << superParent1_ << "\n"

      << "superParent2_ = " << superParent2_ << "\n"

      << "statesClass_  = " << statesClass_  << "\n\n";


  oss << "States: [";

  for (auto s : states_) oss << s << " ";

  oss << "]\n";


  oss << "classCounts_:\n";

  for (auto v : classCounts_) oss << v << " ";

  oss << "\nclassPriors_:\n";

  for (auto v : classPriors_) oss << v << " ";

  oss << "\nsp1FeatureCounts_ (size=" << sp1FeatureCounts_.size() << ")\n";

  for (auto v : sp1FeatureCounts_) oss << v << " ";

  oss << "\nsp2FeatureCounts_ (size=" << sp2FeatureCounts_.size() << ")\n";

  for (auto v : sp2FeatureCounts_) oss << v << " ";

  oss << "\nchildCounts_ (size=" << childCounts_.size() << ")\n";

  for (auto v : childCounts_) oss << v << " ";


  oss << "\nchildOffsets_:\n";

  for (auto c : childOffsets_) oss << c << " ";


  oss << "\n----------------------------------------\n";

  return oss.str();

}


// --------------------------------------

// Some introspection about the graph

// --------------------------------------

int XSp2de::getNumberOfNodes() const

{

  // nFeatures + 1 class node

  return nFeatures_ + 1;

}


int XSp2de::getClassNumStates() const

{

  return statesClass_;

}


int XSp2de::getNFeatures() const

{

  return nFeatures_;

}


int XSp2de::getNumberOfStates() const

{

  // purely an example. Possibly you want to sum up actual

  // cardinalities or something else.

  return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;

}


int XSp2de::getNumberOfEdges() const

{

  // In an SPNDE with n=2, for each feature we have edges from class, sp1, sp2.

  // So that’s 3*(nFeatures_) edges, minus the ones for the superparents themselves,

  // plus the edges from class->superparent1, class->superparent2.

  // For a quick approximation:

  //   - class->sp1, class->sp2 => 2 edges

  //   - class->child => (nFeatures -2) edges

  //   - sp1->child, sp2->child => 2*(nFeatures -2) edges

  // total = 2 + (nFeatures-2) + 2*(nFeatures-2) = 2 + 3*(nFeatures-2)

  //         = 3nFeatures - 4 (just an example).

  // You can adapt to your liking:

  return 3 * nFeatures_ - 4;

}


} // namespace bayesnet