bayesnet/XSPODE_8cc_source.html

// ***************************************************************

// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez

// SPDX-FileType: SOURCE

// SPDX-License-Identifier: MIT

// ***************************************************************

#include <algorithm>

#include <cmath>

#include <limits>

#include <numeric>

#include <sstream>

#include <stdexcept>

#include "XSPODE.h"

#include "bayesnet/utils/TensorUtils.h"


namespace bayesnet {


  // --------------------------------------

  // Constructor

  // --------------------------------------

  XSpode::XSpode(int spIndex)

    : superParent_{ spIndex }, nFeatures_{ 0 }, statesClass_{ 0 }, alpha_{ 1.0 },

    initializer_{ 1.0 }, semaphore_{ CountingSemaphore::getInstance() },

    Classifier(Network())

  {

    validHyperparameters = { "parent" };

  }


  void XSpode::setHyperparameters(const nlohmann::json& hyperparameters_)

  {

    auto hyperparameters = hyperparameters_;

    if (hyperparameters.contains("parent")) {

      superParent_ = hyperparameters["parent"];

      hyperparameters.erase("parent");

    }

    Classifier::setHyperparameters(hyperparameters);

  }


  void XSpode::fitx(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)

  {

    m = X.size(1);

    n = X.size(0);

    dataset = X;

    buildDataset(y);

    buildModel(weights_);

    trainModel(weights_, smoothing);

    fitted = true;

  }


  // --------------------------------------

  // trainModel

  // --------------------------------------

  // Initialize storage needed for the super-parent and child features counts and

  // probs.

  // --------------------------------------

  void XSpode::buildModel(const torch::Tensor& weights)

  {

    int numInstances = m;

    nFeatures_ = n;


    // Derive the number of states for each feature and for the class.

    // (This is just one approach; adapt to match your environment.)

    // Here, we assume the user also gave us the total #states per feature in e.g.

    // statesMap. We'll simply reconstruct the integer states_ array. The last

    // entry is statesClass_.

    states_.resize(nFeatures_);

    for (int f = 0; f < nFeatures_; f++) {

      // Suppose you look up in “statesMap” by the feature name, or read directly

      // from X. We'll assume states_[f] = max value in X[f] + 1.

      states_[f] = dataset[f].max().item<int>() + 1;

    }

    // For the class: states_.back() = max(y)+1

    statesClass_ = dataset[-1].max().item<int>() + 1;


    // Initialize counts

    classCounts_.resize(statesClass_, 0.0);

    // p(x_sp = spVal | c)

    // We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].

    spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0);


    // For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of

    // childCounts_. childCounts_ will be sized as sum_{child≠sp} (states_[child]

    // * statesClass_ * states_[sp]). We also need an offset for each child to

    // index into childCounts_.

    childOffsets_.resize(nFeatures_, -1);

    int totalSize = 0;

    for (int f = 0; f < nFeatures_; f++) {

      if (f == superParent_)

        continue; // skip sp

      childOffsets_[f] = totalSize;

      // block size for this child's counts: states_[f] * statesClass_ *

      // states_[superParent_]

      totalSize += (states_[f] * statesClass_ * states_[superParent_]);

    }

    childCounts_.resize(totalSize, 0.0);

  }

  // --------------------------------------

  // buildModel

  // --------------------------------------

  //

  // We only store conditional probabilities for:

  //   p(x_sp| c)   (the super-parent feature)

  //   p(x_child| c, x_sp)  for all child ≠ sp

  //

  // --------------------------------------

  void XSpode::trainModel(const torch::Tensor& weights,

    const bayesnet::Smoothing_t smoothing)

  {

    // Accumulate raw counts

    for (int i = 0; i < m; i++) {

      std::vector<int> instance(nFeatures_ + 1);

      for (int f = 0; f < nFeatures_; f++) {

        instance[f] = dataset[f][i].item<int>();

      }

      instance[nFeatures_] = dataset[-1][i].item<int>();

      addSample(instance, weights[i].item<double>());

    }

    switch (smoothing) {

      case bayesnet::Smoothing_t::ORIGINAL:

        alpha_ = 1.0 / m;

        break;

      case bayesnet::Smoothing_t::LAPLACE:

        alpha_ = 1.0;

        break;

      default:

        alpha_ = 0.0; // No smoothing

    }

    initializer_ = std::numeric_limits<double>::max() /

      (nFeatures_ * nFeatures_); // for numerical stability

    // Convert raw counts to probabilities

    computeProbabilities();

  }


  // --------------------------------------

  // addSample

  // --------------------------------------

  //

  // instance has size nFeatures_ + 1, with the class at the end.

  // We add 1 to the appropriate counters for each (c, superParentVal, childVal).

  //

  void XSpode::addSample(const std::vector<int>& instance, double weight)

  {

    if (weight <= 0.0)

      return;


    int c = instance.back();

    // (A) increment classCounts

    classCounts_[c] += weight;


    // (B) increment super-parent counts => p(x_sp | c)

    int spVal = instance[superParent_];

    spFeatureCounts_[spVal * statesClass_ + c] += weight;


    // (C) increment child counts => p(childVal | c, x_sp)

    for (int f = 0; f < nFeatures_; f++) {

      if (f == superParent_)

        continue;

      int childVal = instance[f];

      int offset = childOffsets_[f];

      // Compute index in childCounts_.

      // Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]

      int blockSize = states_[f] * statesClass_;

      int idx = offset + spVal * blockSize + childVal * statesClass_ + c;

      childCounts_[idx] += weight;

    }

  }


  // --------------------------------------

  // computeProbabilities

  // --------------------------------------

  //

  // Once all samples are added in COUNTS mode, call this to:

  //    p(c)

  //    p(x_sp = spVal | c)

  //    p(x_child = v | c, x_sp = s_sp)

  //

  // --------------------------------------

  void XSpode::computeProbabilities()

  {

    double totalCount =

      std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);


    // p(c) => classPriors_

    classPriors_.resize(statesClass_, 0.0);

    if (totalCount <= 0.0) {

      // fallback => uniform

      double unif = 1.0 / static_cast<double>(statesClass_);

      for (int c = 0; c < statesClass_; c++) {

        classPriors_[c] = unif;

      }

    } else {

      for (int c = 0; c < statesClass_; c++) {

        classPriors_[c] =

          (classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_);

      }

    }


    // p(x_sp | c)

    spFeatureProbs_.resize(spFeatureCounts_.size());

    // denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ *

    // (#states of sp)

    int spCard = states_[superParent_];

    for (int spVal = 0; spVal < spCard; spVal++) {

      for (int c = 0; c < statesClass_; c++) {

        double denom = classCounts_[c] + alpha_ * spCard;

        double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_;

        spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom);

      }

    }


    // p(x_child | c, x_sp)

    childProbs_.resize(childCounts_.size());

    for (int f = 0; f < nFeatures_; f++) {

      if (f == superParent_)

        continue;

      int offset = childOffsets_[f];

      int childCard = states_[f];


      // For each spVal, c, childVal in childCounts_:

      for (int spVal = 0; spVal < spCard; spVal++) {

        for (int childVal = 0; childVal < childCard; childVal++) {

          for (int c = 0; c < statesClass_; c++) {

            int idx = offset + spVal * (childCard * statesClass_) +

              childVal * statesClass_ + c;


            double num = childCounts_[idx] + alpha_;

            // denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ *

            // (#states of child)

            double denom =

              spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * childCard;

            childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);

          }

        }

      }

    }

  }


  // --------------------------------------

  // predict_proba

  // --------------------------------------

  //

  // For a single instance x of dimension nFeatures_:

  //  P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).

  //

  // --------------------------------------

  std::vector<double> XSpode::predict_proba(const std::vector<int>& instance) const

  {

    if (!fitted) {

      throw std::logic_error(CLASSIFIER_NOT_FITTED);

    }

    std::vector<double> probs(statesClass_, 0.0);

    // Multiply p(c) × p(x_sp | c)

    int spVal = instance[superParent_];

    for (int c = 0; c < statesClass_; c++) {

      double pc = classPriors_[c];

      double pSpC = spFeatureProbs_[spVal * statesClass_ + c];

      probs[c] = pc * pSpC * initializer_;

    }


    // Multiply by each child’s probability p(x_child | c, x_sp)

    for (int feature = 0; feature < nFeatures_; feature++) {

      if (feature == superParent_)

        continue; // skip sp

      int sf = instance[feature];

      int offset = childOffsets_[feature];

      int childCard = states_[feature]; // not used directly, but for clarity

      // Index into childProbs_ = offset + spVal*(childCard*statesClass_) +

      // childVal*statesClass_ + c

      int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_;

      for (int c = 0; c < statesClass_; c++) {

        probs[c] *= childProbs_[base + c];

      }

    }


    // Normalize

    normalize(probs);

    return probs;

  }

  std::vector<std::vector<double>> XSpode::predict_proba(std::vector<std::vector<int>>& test_data)

  {

    int test_size = test_data[0].size();

    int sample_size = test_data.size();

    auto probabilities = std::vector<std::vector<double>>(

      test_size, std::vector<double>(statesClass_));


    int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);

    std::vector<std::thread> threads;

    auto worker = [&](const std::vector<std::vector<int>>& samples, int begin,

      int chunk, int sample_size,

      std::vector<std::vector<double>>& predictions) {

        std::string threadName =

          "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);

#if defined(__linux__)

        pthread_setname_np(pthread_self(), threadName.c_str());

#else

        pthread_setname_np(threadName.c_str());

#endif


        std::vector<int> instance(sample_size);

        for (int sample = begin; sample < begin + chunk; ++sample) {

          for (int feature = 0; feature < sample_size; ++feature) {

            instance[feature] = samples[feature][sample];

          }

          predictions[sample] = predict_proba(instance);

        }

        semaphore_.release();

      };

    for (int begin = 0; begin < test_size; begin += chunk_size) {

      int chunk = std::min(chunk_size, test_size - begin);

      semaphore_.acquire();

      threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));

    }

    for (auto& thread : threads) {

      thread.join();

    }

    return probabilities;

  }


  // --------------------------------------

  // Utility: normalize

  // --------------------------------------

  void XSpode::normalize(std::vector<double>& v) const

  {

    double sum = 0.0;

    for (auto val : v) {

      sum += val;

    }

    if (sum <= 0.0) {

      return;

    }

    for (auto& val : v) {

      val /= sum;

    }

  }


  // --------------------------------------

  // representation of the model

  // --------------------------------------

  std::string XSpode::to_string() const

  {

    std::ostringstream oss;

    oss << "----- XSpode Model -----" << std::endl

      << "nFeatures_  = " << nFeatures_ << std::endl

      << "superParent_ = " << superParent_ << std::endl

      << "statesClass_ = " << statesClass_ << std::endl

      << std::endl;


    oss << "States: [";

    for (int s : states_)

      oss << s << " ";

    oss << "]" << std::endl;

    oss << "classCounts_: [";

    for (double c : classCounts_)

      oss << c << " ";

    oss << "]" << std::endl;

    oss << "classPriors_: [";

    for (double c : classPriors_)

      oss << c << " ";

    oss << "]" << std::endl;

    oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << std::endl

      << "[";

    for (double c : spFeatureCounts_)

      oss << c << " ";

    oss << "]" << std::endl;

    oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << std::endl

      << "[";

    for (double c : spFeatureProbs_)

      oss << c << " ";

    oss << "]" << std::endl;

    oss << "childCounts_: size = " << childCounts_.size() << std::endl << "[";

    for (double cc : childCounts_)

      oss << cc << " ";

    oss << "]" << std::endl;


    for (double cp : childProbs_)

      oss << cp << " ";

    oss << "]" << std::endl;

    oss << "childOffsets_: [";

    for (int co : childOffsets_)

      oss << co << " ";

    oss << "]" << std::endl;

    oss << std::string(40,'-') << std::endl;

    return oss.str();

  }

  int XSpode::getNumberOfNodes() const { return nFeatures_ + 1; }

  int XSpode::getClassNumStates() const { return statesClass_; }

  int XSpode::getNFeatures() const { return nFeatures_; }

  int XSpode::getNumberOfStates() const

  {

    return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;

  }

  int XSpode::getNumberOfEdges() const

  {

    return 2 * nFeatures_ + 1;

  }


  // ------------------------------------------------------

  // Predict overrides (classifier interface)

  // ------------------------------------------------------

  int XSpode::predict(const std::vector<int>& instance) const

  {

    auto p = predict_proba(instance);

    return static_cast<int>(std::distance(p.begin(), std::max_element(p.begin(), p.end())));

  }

  std::vector<int> XSpode::predict(std::vector<std::vector<int>>& test_data)

  {

    auto probabilities = predict_proba(test_data);

    std::vector<int> predictions(probabilities.size(), 0);


    for (size_t i = 0; i < probabilities.size(); i++) {

      predictions[i] = std::distance(

        probabilities[i].begin(),

        std::max_element(probabilities[i].begin(), probabilities[i].end()));

    }

    return predictions;

  }

  torch::Tensor XSpode::predict(torch::Tensor& X)

  {

    auto X_ = TensorUtils::to_matrix(X);

    auto result_v = predict(X_);

    return torch::tensor(result_v, torch::kInt32);

  }

  torch::Tensor XSpode::predict_proba(torch::Tensor& X)

  {

    auto X_ = TensorUtils::to_matrix(X);

    auto result_v = predict_proba(X_);

    int n_samples = X.size(1);

    torch::Tensor result =

      torch::zeros({ n_samples, statesClass_ }, torch::kDouble);

    for (int i = 0; i < result_v.size(); ++i) {

      result.index_put_({ i, "..." }, torch::tensor(result_v[i]));

    }

    return result;

  }

  float XSpode::score(torch::Tensor& X, torch::Tensor& y)

  {

    torch::Tensor y_pred = predict(X);

    return (y_pred == y).sum().item<float>() / y.size(0);

  }

  float XSpode::score(std::vector<std::vector<int>>& X, std::vector<int>& y)

  {

    auto y_pred = this->predict(X);

    int correct = 0;

    for (int i = 0; i < y_pred.size(); ++i) {

      if (y_pred[i] == y[i]) {

        correct++;

      }

    }

    return (double)correct / y_pred.size();

  }

} // namespace bayesnet