BayesNet 1.0.7.
Bayesian Network and basic classifiers Library.
Loading...
Searching...
No Matches
XSP2DE.cc
1// ***************************************************************
2// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
3// SPDX-FileType: SOURCE
4// SPDX-License-Identifier: MIT
5// ***************************************************************
6
7#include "XSP2DE.h"
8#include <pthread.h> // for pthread_setname_np on linux
9#include <cassert>
10#include <cmath>
11#include <limits>
12#include <stdexcept>
13#include <iostream>
14#include "bayesnet/utils/TensorUtils.h"
15
16namespace bayesnet {
17
18// --------------------------------------
19// Constructor
20// --------------------------------------
21XSp2de::XSp2de(int spIndex1, int spIndex2)
22 : superParent1_{ spIndex1 }
23 , superParent2_{ spIndex2 }
24 , nFeatures_{0}
25 , statesClass_{0}
26 , alpha_{1.0}
27 , initializer_{1.0}
28 , semaphore_{ CountingSemaphore::getInstance() }
29 , Classifier(Network())
30{
31 validHyperparameters = { "parent1", "parent2" };
32}
33
34// --------------------------------------
35// setHyperparameters
36// --------------------------------------
37void XSp2de::setHyperparameters(const nlohmann::json &hyperparameters_)
38{
39 auto hyperparameters = hyperparameters_;
40 if (hyperparameters.contains("parent1")) {
41 superParent1_ = hyperparameters["parent1"];
42 hyperparameters.erase("parent1");
43 }
44 if (hyperparameters.contains("parent2")) {
45 superParent2_ = hyperparameters["parent2"];
46 hyperparameters.erase("parent2");
47 }
48 // Hand off anything else to base Classifier
49 Classifier::setHyperparameters(hyperparameters);
50}
51
52// --------------------------------------
53// fitx
54// --------------------------------------
55void XSp2de::fitx(torch::Tensor & X, torch::Tensor & y,
56 torch::Tensor & weights_, const Smoothing_t smoothing)
57{
58 m = X.size(1); // number of samples
59 n = X.size(0); // number of features
60 dataset = X;
61
62 // Build the dataset in your environment if needed:
63 buildDataset(y);
64
65 // Construct the data structures needed for counting
66 buildModel(weights_);
67
68 // Accumulate counts & convert to probabilities
69 trainModel(weights_, smoothing);
70 fitted = true;
71}
72
73// --------------------------------------
74// buildModel
75// --------------------------------------
76void XSp2de::buildModel(const torch::Tensor &weights)
77{
78 nFeatures_ = n;
79
80 // Derive the number of states for each feature from the dataset
81 // states_[f] = max value in dataset[f] + 1.
82 states_.resize(nFeatures_);
83 for (int f = 0; f < nFeatures_; f++) {
84 // This is naive: we take max in feature f. You might adapt for real data.
85 states_[f] = dataset[f].max().item<int>() + 1;
86 }
87 // Class states:
88 statesClass_ = dataset[-1].max().item<int>() + 1;
89
90 // Initialize the class counts
91 classCounts_.resize(statesClass_, 0.0);
92
93 // For sp1 -> p(sp1Val| c)
94 sp1FeatureCounts_.resize(states_[superParent1_] * statesClass_, 0.0);
95
96 // For sp2 -> p(sp2Val| c)
97 sp2FeatureCounts_.resize(states_[superParent2_] * statesClass_, 0.0);
98
99 // For child features, we store p(childVal | c, sp1Val, sp2Val).
100 // childCounts_ will hold raw counts. We’ll gather them in one big vector.
101 // We need an offset for each feature.
102 childOffsets_.resize(nFeatures_, -1);
103
104 int totalSize = 0;
105 for (int f = 0; f < nFeatures_; f++) {
106 if (f == superParent1_ || f == superParent2_) {
107 // skip the superparents
108 childOffsets_[f] = -1;
109 continue;
110 }
111 childOffsets_[f] = totalSize;
112 // block size for a single child f: states_[f] * statesClass_
113 // * states_[superParent1_]
114 // * states_[superParent2_].
115 totalSize += (states_[f] * statesClass_
116 * states_[superParent1_]
117 * states_[superParent2_]);
118 }
119 childCounts_.resize(totalSize, 0.0);
120}
121
122// --------------------------------------
123// trainModel
124// --------------------------------------
125void XSp2de::trainModel(const torch::Tensor &weights,
126 const bayesnet::Smoothing_t smoothing)
127{
128 // Accumulate raw counts
129 for (int i = 0; i < m; i++) {
130 std::vector<int> instance(nFeatures_ + 1);
131 for (int f = 0; f < nFeatures_; f++) {
132 instance[f] = dataset[f][i].item<int>();
133 }
134 instance[nFeatures_] = dataset[-1][i].item<int>(); // class
135 double w = weights[i].item<double>();
136 addSample(instance, w);
137 }
138
139 // Choose alpha based on smoothing:
140 switch (smoothing) {
141 case bayesnet::Smoothing_t::ORIGINAL:
142 alpha_ = 1.0 / m;
143 break;
144 case bayesnet::Smoothing_t::LAPLACE:
145 alpha_ = 1.0;
146 break;
147 default:
148 alpha_ = 0.0; // no smoothing
149 }
150
151 // Large initializer factor for numerical stability
152 initializer_ = std::numeric_limits<double>::max() / (nFeatures_ * nFeatures_);
153
154 // Convert raw counts to probabilities
155 computeProbabilities();
156}
157
158// --------------------------------------
159// addSample
160// --------------------------------------
161void XSp2de::addSample(const std::vector<int> &instance, double weight)
162{
163 if (weight <= 0.0)
164 return;
165
166 int c = instance.back();
167 // increment classCounts
168 classCounts_[c] += weight;
169
170 int sp1Val = instance[superParent1_];
171 int sp2Val = instance[superParent2_];
172
173 // p(sp1|c)
174 sp1FeatureCounts_[sp1Val * statesClass_ + c] += weight;
175
176 // p(sp2|c)
177 sp2FeatureCounts_[sp2Val * statesClass_ + c] += weight;
178
179 // p(childVal| c, sp1Val, sp2Val)
180 for (int f = 0; f < nFeatures_; f++) {
181 if (f == superParent1_ || f == superParent2_)
182 continue;
183
184 int childVal = instance[f];
185 int offset = childOffsets_[f];
186 // block layout:
187 // offset + (sp1Val*(states_[sp2_]* states_[f]* statesClass_))
188 // + (sp2Val*(states_[f]* statesClass_))
189 // + childVal*(statesClass_)
190 // + c
191 int blockSizeSp2 = states_[superParent2_]
192 * states_[f]
193 * statesClass_;
194 int blockSizeChild = states_[f] * statesClass_;
195
196 int idx = offset
197 + sp1Val*blockSizeSp2
198 + sp2Val*blockSizeChild
199 + childVal*statesClass_
200 + c;
201 childCounts_[idx] += weight;
202 }
203}
204
205// --------------------------------------
206// computeProbabilities
207// --------------------------------------
208void XSp2de::computeProbabilities()
209{
210 double totalCount = std::accumulate(classCounts_.begin(),
211 classCounts_.end(), 0.0);
212
213 // classPriors_
214 classPriors_.resize(statesClass_, 0.0);
215 if (totalCount <= 0.0) {
216 // fallback => uniform
217 double unif = 1.0 / static_cast<double>(statesClass_);
218 for (int c = 0; c < statesClass_; c++) {
219 classPriors_[c] = unif;
220 }
221 } else {
222 for (int c = 0; c < statesClass_; c++) {
223 classPriors_[c] =
224 (classCounts_[c] + alpha_)
225 / (totalCount + alpha_ * statesClass_);
226 }
227 }
228
229 // p(sp1Val| c)
230 sp1FeatureProbs_.resize(sp1FeatureCounts_.size());
231 int sp1Card = states_[superParent1_];
232 for (int spVal = 0; spVal < sp1Card; spVal++) {
233 for (int c = 0; c < statesClass_; c++) {
234 double denom = classCounts_[c] + alpha_ * sp1Card;
235 double num = sp1FeatureCounts_[spVal * statesClass_ + c] + alpha_;
236 sp1FeatureProbs_[spVal * statesClass_ + c] =
237 (denom <= 0.0 ? 0.0 : num / denom);
238 }
239 }
240
241 // p(sp2Val| c)
242 sp2FeatureProbs_.resize(sp2FeatureCounts_.size());
243 int sp2Card = states_[superParent2_];
244 for (int spVal = 0; spVal < sp2Card; spVal++) {
245 for (int c = 0; c < statesClass_; c++) {
246 double denom = classCounts_[c] + alpha_ * sp2Card;
247 double num = sp2FeatureCounts_[spVal * statesClass_ + c] + alpha_;
248 sp2FeatureProbs_[spVal * statesClass_ + c] =
249 (denom <= 0.0 ? 0.0 : num / denom);
250 }
251 }
252
253 // p(childVal| c, sp1Val, sp2Val)
254 childProbs_.resize(childCounts_.size());
255 int offset = 0;
256 for (int f = 0; f < nFeatures_; f++) {
257 if (f == superParent1_ || f == superParent2_)
258 continue;
259
260 int fCard = states_[f];
261 int sp1Card_ = states_[superParent1_];
262 int sp2Card_ = states_[superParent2_];
263 int childBlockSizeSp2 = sp2Card_ * fCard * statesClass_;
264 int childBlockSizeF = fCard * statesClass_;
265
266 int blockSize = fCard * sp1Card_ * sp2Card_ * statesClass_;
267 for (int sp1Val = 0; sp1Val < sp1Card_; sp1Val++) {
268 for (int sp2Val = 0; sp2Val < sp2Card_; sp2Val++) {
269 for (int childVal = 0; childVal < fCard; childVal++) {
270 for (int c = 0; c < statesClass_; c++) {
271 // index in childCounts_
272 int idx = offset
273 + sp1Val*childBlockSizeSp2
274 + sp2Val*childBlockSizeF
275 + childVal*statesClass_
276 + c;
277 double num = childCounts_[idx] + alpha_;
278 // denominator is the count of (sp1Val,sp2Val,c) plus alpha * fCard
279 // We can find that by summing childVal dimension, but we already
280 // have it in childCounts_[...] or we can re-check the superparent
281 // counts if your approach is purely hierarchical.
282 // Here we'll do it like the XSpode approach: sp1&sp2 are
283 // conditionally independent given c, so denominators come from
284 // summing the relevant block or we treat sp1,sp2 as "parents."
285 // A simpler approach:
286 double sumSp1Sp2C = 0.0;
287 // sum over all childVal:
288 for (int cv = 0; cv < fCard; cv++) {
289 int idx2 = offset
290 + sp1Val*childBlockSizeSp2
291 + sp2Val*childBlockSizeF
292 + cv*statesClass_ + c;
293 sumSp1Sp2C += childCounts_[idx2];
294 }
295 double denom = sumSp1Sp2C + alpha_ * fCard;
296 childProbs_[idx] = (denom <= 0.0 ? 0.0 : num / denom);
297 }
298 }
299 }
300 }
301 offset += blockSize;
302 }
303}
304
305// --------------------------------------
306// predict_proba (single instance)
307// --------------------------------------
308std::vector<double> XSp2de::predict_proba(const std::vector<int> &instance) const
309{
310 if (!fitted) {
311 throw std::logic_error(CLASSIFIER_NOT_FITTED);
312 }
313 std::vector<double> probs(statesClass_, 0.0);
314
315 int sp1Val = instance[superParent1_];
316 int sp2Val = instance[superParent2_];
317
318 // Start with p(c) * p(sp1Val| c) * p(sp2Val| c)
319 for (int c = 0; c < statesClass_; c++) {
320 double pC = classPriors_[c];
321 double pSp1C = sp1FeatureProbs_[sp1Val * statesClass_ + c];
322 double pSp2C = sp2FeatureProbs_[sp2Val * statesClass_ + c];
323 probs[c] = pC * pSp1C * pSp2C * initializer_;
324 }
325
326 // Multiply by each child feature f
327 int offset = 0;
328 for (int f = 0; f < nFeatures_; f++) {
329 if (f == superParent1_ || f == superParent2_)
330 continue;
331
332 int valF = instance[f];
333 int fCard = states_[f];
334 int sp1Card = states_[superParent1_];
335 int sp2Card = states_[superParent2_];
336 int blockSizeSp2 = sp2Card * fCard * statesClass_;
337 int blockSizeF = fCard * statesClass_;
338
339 // base index for childProbs_ for this child and sp1Val, sp2Val
340 int base = offset
341 + sp1Val*blockSizeSp2
342 + sp2Val*blockSizeF
343 + valF*statesClass_;
344 for (int c = 0; c < statesClass_; c++) {
345 probs[c] *= childProbs_[base + c];
346 }
347 offset += (fCard * sp1Card * sp2Card * statesClass_);
348 }
349
350 // Normalize
351 normalize(probs);
352 return probs;
353}
354
355// --------------------------------------
356// predict_proba (batch)
357// --------------------------------------
358std::vector<std::vector<double>> XSp2de::predict_proba(std::vector<std::vector<int>> &test_data)
359{
360 int test_size = test_data[0].size(); // each feature is test_data[f], size = #samples
361 int sample_size = test_data.size(); // = nFeatures_
362 std::vector<std::vector<double>> probabilities(
363 test_size, std::vector<double>(statesClass_, 0.0));
364
365 // same concurrency approach
366 int chunk_size = std::min(150, int(test_size / semaphore_.getMaxCount()) + 1);
367 std::vector<std::thread> threads;
368
369 auto worker = [&](const std::vector<std::vector<int>> &samples,
370 int begin,
371 int chunk,
372 int sample_size,
373 std::vector<std::vector<double>> &predictions) {
374 std::string threadName =
375 "XSp2de-" + std::to_string(begin) + "-" + std::to_string(chunk);
376#if defined(__linux__)
377 pthread_setname_np(pthread_self(), threadName.c_str());
378#else
379 pthread_setname_np(threadName.c_str());
380#endif
381
382 std::vector<int> instance(sample_size);
383 for (int sample = begin; sample < begin + chunk; ++sample) {
384 for (int feature = 0; feature < sample_size; ++feature) {
385 instance[feature] = samples[feature][sample];
386 }
387 predictions[sample] = predict_proba(instance);
388 }
389 semaphore_.release();
390 };
391
392 for (int begin = 0; begin < test_size; begin += chunk_size) {
393 int chunk = std::min(chunk_size, test_size - begin);
394 semaphore_.acquire();
395 threads.emplace_back(worker, test_data, begin, chunk, sample_size,
396 std::ref(probabilities));
397 }
398 for (auto &th : threads) {
399 th.join();
400 }
401 return probabilities;
402}
403
404// --------------------------------------
405// predict (single instance)
406// --------------------------------------
407int XSp2de::predict(const std::vector<int> &instance) const
408{
409 auto p = predict_proba(instance);
410 return static_cast<int>(
411 std::distance(p.begin(), std::max_element(p.begin(), p.end()))
412 );
413}
414
415// --------------------------------------
416// predict (batch of data)
417// --------------------------------------
418std::vector<int> XSp2de::predict(std::vector<std::vector<int>> &test_data)
419{
420 auto probabilities = predict_proba(test_data);
421 std::vector<int> predictions(probabilities.size(), 0);
422
423 for (size_t i = 0; i < probabilities.size(); i++) {
424 predictions[i] = static_cast<int>(
425 std::distance(probabilities[i].begin(),
426 std::max_element(probabilities[i].begin(),
427 probabilities[i].end()))
428 );
429 }
430 return predictions;
431}
432
433// --------------------------------------
434// predict (torch::Tensor version)
435// --------------------------------------
436torch::Tensor XSp2de::predict(torch::Tensor &X)
437{
438 auto X_ = TensorUtils::to_matrix(X);
439 auto result_v = predict(X_);
440 return torch::tensor(result_v, torch::kInt32);
441}
442
443// --------------------------------------
444// predict_proba (torch::Tensor version)
445// --------------------------------------
446torch::Tensor XSp2de::predict_proba(torch::Tensor &X)
447{
448 auto X_ = TensorUtils::to_matrix(X);
449 auto result_v = predict_proba(X_);
450 int n_samples = X.size(1);
451 torch::Tensor result =
452 torch::zeros({ n_samples, statesClass_ }, torch::kDouble);
453 for (int i = 0; i < (int)result_v.size(); ++i) {
454 result.index_put_({ i, "..." }, torch::tensor(result_v[i]));
455 }
456 return result;
457}
458
459// --------------------------------------
460// score (torch::Tensor version)
461// --------------------------------------
462float XSp2de::score(torch::Tensor &X, torch::Tensor &y)
463{
464 torch::Tensor y_pred = predict(X);
465 return (y_pred == y).sum().item<float>() / y.size(0);
466}
467
468// --------------------------------------
469// score (vector version)
470// --------------------------------------
471float XSp2de::score(std::vector<std::vector<int>> &X, std::vector<int> &y)
472{
473 auto y_pred = predict(X);
474 int correct = 0;
475 for (size_t i = 0; i < y_pred.size(); ++i) {
476 if (y_pred[i] == y[i]) {
477 correct++;
478 }
479 }
480 return static_cast<float>(correct) / static_cast<float>(y_pred.size());
481}
482
483// --------------------------------------
484// Utility: normalize
485// --------------------------------------
486void XSp2de::normalize(std::vector<double> &v) const
487{
488 double sum = 0.0;
489 for (auto &val : v) {
490 sum += val;
491 }
492 if (sum > 0.0) {
493 for (auto &val : v) {
494 val /= sum;
495 }
496 }
497}
498
499// --------------------------------------
500// to_string
501// --------------------------------------
502std::string XSp2de::to_string() const
503{
504 std::ostringstream oss;
505 oss << "----- XSp2de Model -----\n"
506 << "nFeatures_ = " << nFeatures_ << "\n"
507 << "superParent1_ = " << superParent1_ << "\n"
508 << "superParent2_ = " << superParent2_ << "\n"
509 << "statesClass_ = " << statesClass_ << "\n\n";
510
511 oss << "States: [";
512 for (auto s : states_) oss << s << " ";
513 oss << "]\n";
514
515 oss << "classCounts_:\n";
516 for (auto v : classCounts_) oss << v << " ";
517 oss << "\nclassPriors_:\n";
518 for (auto v : classPriors_) oss << v << " ";
519 oss << "\nsp1FeatureCounts_ (size=" << sp1FeatureCounts_.size() << ")\n";
520 for (auto v : sp1FeatureCounts_) oss << v << " ";
521 oss << "\nsp2FeatureCounts_ (size=" << sp2FeatureCounts_.size() << ")\n";
522 for (auto v : sp2FeatureCounts_) oss << v << " ";
523 oss << "\nchildCounts_ (size=" << childCounts_.size() << ")\n";
524 for (auto v : childCounts_) oss << v << " ";
525
526 oss << "\nchildOffsets_:\n";
527 for (auto c : childOffsets_) oss << c << " ";
528
529 oss << "\n----------------------------------------\n";
530 return oss.str();
531}
532
533// --------------------------------------
534// Some introspection about the graph
535// --------------------------------------
536int XSp2de::getNumberOfNodes() const
537{
538 // nFeatures + 1 class node
539 return nFeatures_ + 1;
540}
541
542int XSp2de::getClassNumStates() const
543{
544 return statesClass_;
545}
546
547int XSp2de::getNFeatures() const
548{
549 return nFeatures_;
550}
551
552int XSp2de::getNumberOfStates() const
553{
554 // purely an example. Possibly you want to sum up actual
555 // cardinalities or something else.
556 return std::accumulate(states_.begin(), states_.end(), 0) * nFeatures_;
557}
558
559int XSp2de::getNumberOfEdges() const
560{
561 // In an SPNDE with n=2, for each feature we have edges from class, sp1, sp2.
562 // So that’s 3*(nFeatures_) edges, minus the ones for the superparents themselves,
563 // plus the edges from class->superparent1, class->superparent2.
564 // For a quick approximation:
565 // - class->sp1, class->sp2 => 2 edges
566 // - class->child => (nFeatures -2) edges
567 // - sp1->child, sp2->child => 2*(nFeatures -2) edges
568 // total = 2 + (nFeatures-2) + 2*(nFeatures-2) = 2 + 3*(nFeatures-2)
569 // = 3nFeatures - 4 (just an example).
570 // You can adapt to your liking:
571 return 3 * nFeatures_ - 4;
572}
573
574} // namespace bayesnet
575