BayesNet 1.0.7.
Bayesian Network and basic classifiers Library.
Loading...
Searching...
No Matches
XSPODE.cc
1// ***************************************************************
2// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
3// SPDX-FileType: SOURCE
4// SPDX-License-Identifier: MIT
5// ***************************************************************
6#include <algorithm>
7#include <cmath>
8#include <limits>
9#include <numeric>
10#include <sstream>
11#include <stdexcept>
12#include "XSPODE.h"
13#include "bayesnet/utils/TensorUtils.h"
14
15namespace bayesnet {
16
17 // --------------------------------------
18 // Constructor
19 // --------------------------------------
20 XSpode::XSpode(int spIndex)
21 : superParent_{ spIndex }, nFeatures_{ 0 }, statesClass_{ 0 }, alpha_{ 1.0 },
22 initializer_{ 1.0 }, semaphore_{ CountingSemaphore::getInstance() },
23 Classifier(Network())
24 {
25 validHyperparameters = { "parent" };
26 }
27
28 void XSpode::setHyperparameters(const nlohmann::json& hyperparameters_)
29 {
30 auto hyperparameters = hyperparameters_;
31 if (hyperparameters.contains("parent")) {
32 superParent_ = hyperparameters["parent"];
33 hyperparameters.erase("parent");
34 }
35 Classifier::setHyperparameters(hyperparameters);
36 }
37
38 void XSpode::fitx(torch::Tensor & X, torch::Tensor& y, torch::Tensor& weights_, const Smoothing_t smoothing)
39 {
40 m = X.size(1);
41 n = X.size(0);
42 dataset = X;
43 buildDataset(y);
44 buildModel(weights_);
45 trainModel(weights_, smoothing);
46 fitted = true;
47 }
48
49 // --------------------------------------
50 // trainModel
51 // --------------------------------------
52 // Initialize storage needed for the super-parent and child features counts and
53 // probs.
54 // --------------------------------------
55 void XSpode::buildModel(const torch::Tensor& weights)
56 {
57 int numInstances = m;
58 nFeatures_ = n;
59
60 // Derive the number of states for each feature and for the class.
61 // (This is just one approach; adapt to match your environment.)
62 // Here, we assume the user also gave us the total #states per feature in e.g.
63 // statesMap. We'll simply reconstruct the integer states_ array. The last
64 // entry is statesClass_.
65 states_.resize(nFeatures_);
66 for (int f = 0; f < nFeatures_; f++) {
67 // Suppose you look up in “statesMap” by the feature name, or read directly
68 // from X. We'll assume states_[f] = max value in X[f] + 1.
69 states_[f] = dataset[f].max().item<int>() + 1;
70 }
71 // For the class: states_.back() = max(y)+1
72 statesClass_ = dataset[-1].max().item<int>() + 1;
73
74 // Initialize counts
75 classCounts_.resize(statesClass_, 0.0);
76 // p(x_sp = spVal | c)
77 // We'll store these counts in spFeatureCounts_[spVal * statesClass_ + c].
78 spFeatureCounts_.resize(states_[superParent_] * statesClass_, 0.0);
79
80 // For each child ≠ sp, we store p(childVal| c, spVal) in a separate block of
81 // childCounts_. childCounts_ will be sized as sum_{child≠sp} (states_[child]
82 // * statesClass_ * states_[sp]). We also need an offset for each child to
83 // index into childCounts_.
84 childOffsets_.resize(nFeatures_, -1);
85 int totalSize = 0;
86 for (int f = 0; f < nFeatures_; f++) {
87 if (f == superParent_)
88 continue; // skip sp
89 childOffsets_[f] = totalSize;
90 // block size for this child's counts: states_[f] * statesClass_ *
91 // states_[superParent_]
92 totalSize += (states_[f] * statesClass_ * states_[superParent_]);
93 }
94 childCounts_.resize(totalSize, 0.0);
95 }
96 // --------------------------------------
97 // buildModel
98 // --------------------------------------
99 //
100 // We only store conditional probabilities for:
101 // p(x_sp| c) (the super-parent feature)
102 // p(x_child| c, x_sp) for all child ≠ sp
103 //
104 // --------------------------------------
105 void XSpode::trainModel(const torch::Tensor& weights,
106 const bayesnet::Smoothing_t smoothing)
107 {
108 // Accumulate raw counts
109 for (int i = 0; i < m; i++) {
110 std::vector<int> instance(nFeatures_ + 1);
111 for (int f = 0; f < nFeatures_; f++) {
112 instance[f] = dataset[f][i].item<int>();
113 }
114 instance[nFeatures_] = dataset[-1][i].item<int>();
115 addSample(instance, weights[i].item<double>());
116 }
117 switch (smoothing) {
118 case bayesnet::Smoothing_t::ORIGINAL:
119 alpha_ = 1.0 / m;
120 break;
121 case bayesnet::Smoothing_t::LAPLACE:
122 alpha_ = 1.0;
123 break;
124 default:
125 alpha_ = 0.0; // No smoothing
126 }
127 initializer_ = std::numeric_limits<double>::max() /
128 (nFeatures_ * nFeatures_); // for numerical stability
129 // Convert raw counts to probabilities
130 computeProbabilities();
131 }
132
133 // --------------------------------------
134 // addSample
135 // --------------------------------------
136 //
137 // instance has size nFeatures_ + 1, with the class at the end.
138 // We add 1 to the appropriate counters for each (c, superParentVal, childVal).
139 //
140 void XSpode::addSample(const std::vector<int>& instance, double weight)
141 {
142 if (weight <= 0.0)
143 return;
144
145 int c = instance.back();
146 // (A) increment classCounts
147 classCounts_[c] += weight;
148
149 // (B) increment super-parent counts => p(x_sp | c)
150 int spVal = instance[superParent_];
151 spFeatureCounts_[spVal * statesClass_ + c] += weight;
152
153 // (C) increment child counts => p(childVal | c, x_sp)
154 for (int f = 0; f < nFeatures_; f++) {
155 if (f == superParent_)
156 continue;
157 int childVal = instance[f];
158 int offset = childOffsets_[f];
159 // Compute index in childCounts_.
160 // Layout: [ offset + (spVal * states_[f] + childVal) * statesClass_ + c ]
161 int blockSize = states_[f] * statesClass_;
162 int idx = offset + spVal * blockSize + childVal * statesClass_ + c;
163 childCounts_[idx] += weight;
164 }
165 }
166
167 // --------------------------------------
168 // computeProbabilities
169 // --------------------------------------
170 //
171 // Once all samples are added in COUNTS mode, call this to:
172 // p(c)
173 // p(x_sp = spVal | c)
174 // p(x_child = v | c, x_sp = s_sp)
175 //
176 // --------------------------------------
177 void XSpode::computeProbabilities()
178 {
179 double totalCount =
180 std::accumulate(classCounts_.begin(), classCounts_.end(), 0.0);
181
182 // p(c) => classPriors_
183 classPriors_.resize(statesClass_, 0.0);
184 if (totalCount <= 0.0) {
185 // fallback => uniform
186 double unif = 1.0 / static_cast<double>(statesClass_);
187 for (int c = 0; c < statesClass_; c++) {
188 classPriors_[c] = unif;
189 }
190 } else {
191 for (int c = 0; c < statesClass_; c++) {
192 classPriors_[c] =
193 (classCounts_[c] + alpha_) / (totalCount + alpha_ * statesClass_);
194 }
195 }
196
197 // p(x_sp | c)
198 spFeatureProbs_.resize(spFeatureCounts_.size());
199 // denominator for spVal * statesClass_ + c is just classCounts_[c] + alpha_ *
200 // (#states of sp)
201 int spCard = states_[superParent_];
202 for (int spVal = 0; spVal < spCard; spVal++) {
203 for (int c = 0; c < statesClass_; c++) {
204 double denom = classCounts_[c] + alpha_ * spCard;
205 double num = spFeatureCounts_[spVal * statesClass_ + c] + alpha_;
206 spFeatureProbs_[spVal * statesClass_ + c] = (denom <= 0.0 ? 0.0 : num / denom);
207 }
208 }
209
210 // p(x_child | c, x_sp)
211 childProbs_.resize(childCounts_.size());
212 for (int f = 0; f < nFeatures_; f++) {
213 if (f == superParent_)
214 continue;
215 int offset = childOffsets_[f];
216 int childCard = states_[f];
217
218 // For each spVal, c, childVal in childCounts_:
219 for (int spVal = 0; spVal < spCard; spVal++) {
220 for (int childVal = 0; childVal < childCard; childVal++) {
221 for (int c = 0; c < statesClass_; c++) {
222 int idx = offset + spVal * (childCard * statesClass_) +
223 childVal * statesClass_ + c;
224
225 double num = childCounts_[idx] + alpha_;
226 // denominator = spFeatureCounts_[spVal * statesClass_ + c] + alpha_ *
227 // (#states of child)
228 double denom =
229 spFeatureCounts_[spVal * statesClass_ + c] + alpha_ * childCard;
230 childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
231 }
232 }
233 }
234 }
235 }
236
237 // --------------------------------------
238 // predict_proba
239 // --------------------------------------
240 //
241 // For a single instance x of dimension nFeatures_:
242 // P(c | x) ∝ p(c) × p(x_sp | c) × ∏(child ≠ sp) p(x_child | c, x_sp).
243 //
244 // --------------------------------------
245 std::vector<double> XSpode::predict_proba(const std::vector<int>& instance) const
246 {
247 if (!fitted) {
248 throw std::logic_error(CLASSIFIER_NOT_FITTED);
249 }
250 std::vector<double> probs(statesClass_, 0.0);
251 // Multiply p(c) × p(x_sp | c)
252 int spVal = instance[superParent_];
253 for (int c = 0; c < statesClass_; c++) {
254 double pc = classPriors_[c];
255 double pSpC = spFeatureProbs_[spVal * statesClass_ + c];
256 probs[c] = pc * pSpC * initializer_;
257 }
258
259 // Multiply by each child’s probability p(x_child | c, x_sp)
260 for (int feature = 0; feature < nFeatures_; feature++) {
261 if (feature == superParent_)
262 continue; // skip sp
263 int sf = instance[feature];
264 int offset = childOffsets_[feature];
265 int childCard = states_[feature]; // not used directly, but for clarity
266 // Index into childProbs_ = offset + spVal*(childCard*statesClass_) +
267 // childVal*statesClass_ + c
268 int base = offset + spVal * (childCard * statesClass_) + sf * statesClass_;
269 for (int c = 0; c < statesClass_; c++) {
270 probs[c] *= childProbs_[base + c];
271 }
272 }
273
274 // Normalize
275 normalize(probs);
276 return probs;
277 }
278 std::vector<std::vector<double>> XSpode::predict_proba(std::vector<std::vector<int>>& test_data)
279 {
280 int test_size = test_data[0].size();
281 int sample_size = test_data.size();
282 auto probabilities = std::vector<std::vector<double>>(
283 test_size, std::vector<double>(statesClass_));
284
285 int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
286 std::vector<std::thread> threads;
287 auto worker = [&](const std::vector<std::vector<int>>& samples, int begin,
288 int chunk, int sample_size,
289 std::vector<std::vector<double>>& predictions) {
290 std::string threadName =
291 "(V)PWorker-" + std::to_string(begin) + "-" + std::to_string(chunk);
292#if defined(__linux__)
293 pthread_setname_np(pthread_self(), threadName.c_str());
294#else
295 pthread_setname_np(threadName.c_str());
296#endif
297
298 std::vector<int> instance(sample_size);
299 for (int sample = begin; sample < begin + chunk; ++sample) {
300 for (int feature = 0; feature < sample_size; ++feature) {
301 instance[feature] = samples[feature][sample];
302 }
303 predictions[sample] = predict_proba(instance);
304 }
305 semaphore_.release();
306 };
307 for (int begin = 0; begin < test_size; begin += chunk_size) {
308 int chunk = std::min(chunk_size, test_size - begin);
309 semaphore_.acquire();
310 threads.emplace_back(worker, test_data, begin, chunk, sample_size, std::ref(probabilities));
311 }
312 for (auto& thread : threads) {
313 thread.join();
314 }
315 return probabilities;
316 }
317
318 // --------------------------------------
319 // Utility: normalize
320 // --------------------------------------
321 void XSpode::normalize(std::vector<double>& v) const
322 {
323 double sum = 0.0;
324 for (auto val : v) {
325 sum += val;
326 }
327 if (sum <= 0.0) {
328 return;
329 }
330 for (auto& val : v) {
331 val /= sum;
332 }
333 }
334
335 // --------------------------------------
336 // representation of the model
337 // --------------------------------------
338 std::string XSpode::to_string() const
339 {
340 std::ostringstream oss;
341 oss << "----- XSpode Model -----" << std::endl
342 << "nFeatures_ = " << nFeatures_ << std::endl
343 << "superParent_ = " << superParent_ << std::endl
344 << "statesClass_ = " << statesClass_ << std::endl
345 << std::endl;
346
347 oss << "States: [";
348 for (int s : states_)
349 oss << s << " ";
350 oss << "]" << std::endl;
351 oss << "classCounts_: [";
352 for (double c : classCounts_)
353 oss << c << " ";
354 oss << "]" << std::endl;
355 oss << "classPriors_: [";
356 for (double c : classPriors_)
357 oss << c << " ";
358 oss << "]" << std::endl;
359 oss << "spFeatureCounts_: size = " << spFeatureCounts_.size() << std::endl
360 << "[";
361 for (double c : spFeatureCounts_)
362 oss << c << " ";
363 oss << "]" << std::endl;
364 oss << "spFeatureProbs_: size = " << spFeatureProbs_.size() << std::endl
365 << "[";
366 for (double c : spFeatureProbs_)
367 oss << c << " ";
368 oss << "]" << std::endl;
369 oss << "childCounts_: size = " << childCounts_.size() << std::endl << "[";
370 for (double cc : childCounts_)
371 oss << cc << " ";
372 oss << "]" << std::endl;
373
374 for (double cp : childProbs_)
375 oss << cp << " ";
376 oss << "]" << std::endl;
377 oss << "childOffsets_: [";
378 for (int co : childOffsets_)
379 oss << co << " ";
380 oss << "]" << std::endl;
381 oss << std::string(40,'-') << std::endl;
382 return oss.str();
383 }
384 int XSpode::getNumberOfNodes() const { return nFeatures_ + 1; }
385 int XSpode::getClassNumStates() const { return statesClass_; }
386 int XSpode::getNFeatures() const { return nFeatures_; }
387 int XSpode::getNumberOfStates() const
388 {
389 return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
390 }
391 int XSpode::getNumberOfEdges() const
392 {
393 return 2 * nFeatures_ + 1;
394 }
395
396 // ------------------------------------------------------
397 // Predict overrides (classifier interface)
398 // ------------------------------------------------------
399 int XSpode::predict(const std::vector<int>& instance) const
400 {
401 auto p = predict_proba(instance);
402 return static_cast<int>(std::distance(p.begin(), std::max_element(p.begin(), p.end())));
403 }
404 std::vector<int> XSpode::predict(std::vector<std::vector<int>>& test_data)
405 {
406 auto probabilities = predict_proba(test_data);
407 std::vector<int> predictions(probabilities.size(), 0);
408
409 for (size_t i = 0; i < probabilities.size(); i++) {
410 predictions[i] = std::distance(
411 probabilities[i].begin(),
412 std::max_element(probabilities[i].begin(), probabilities[i].end()));
413 }
414 return predictions;
415 }
416 torch::Tensor XSpode::predict(torch::Tensor& X)
417 {
418 auto X_ = TensorUtils::to_matrix(X);
419 auto result_v = predict(X_);
420 return torch::tensor(result_v, torch::kInt32);
421 }
422 torch::Tensor XSpode::predict_proba(torch::Tensor& X)
423 {
424 auto X_ = TensorUtils::to_matrix(X);
425 auto result_v = predict_proba(X_);
426 int n_samples = X.size(1);
427 torch::Tensor result =
428 torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
429 for (int i = 0; i < result_v.size(); ++i) {
430 result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
431 }
432 return result;
433 }
434 float XSpode::score(torch::Tensor& X, torch::Tensor& y)
435 {
436 torch::Tensor y_pred = predict(X);
437 return (y_pred == y).sum().item<float>() / y.size(0);
438 }
439 float XSpode::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
440 {
441 auto y_pred = this->predict(X);
442 int correct = 0;
443 for (int i = 0; i < y_pred.size(); ++i) {
444 if (y_pred[i] == y[i]) {
445 correct++;
446 }
447 }
448 return (double)correct / y_pred.size();
449 }
450} // namespace bayesnet