BayesNet 1.0.7.
Bayesian Network and basic classifiers Library.
Loading...
Searching...
No Matches
Classifier.cc
1// ***************************************************************
2// SPDX-FileCopyrightText: Copyright 2024 Ricardo Montañana Gómez
3// SPDX-FileType: SOURCE
4// SPDX-License-Identifier: MIT
5// ***************************************************************
6
7#include <sstream>
8#include "bayesnet/utils/bayesnetUtils.h"
9#include "Classifier.h"
10
11namespace bayesnet {
12 Classifier::Classifier(Network model) : model(model), m(0), n(0), metrics(Metrics()), fitted(false) {}
13 Classifier& Classifier::build(const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
14 {
15 this->features = features;
16 this->className = className;
17 this->states = states;
18 m = dataset.size(1);
19 n = features.size();
20 checkFitParameters();
21 auto n_classes = states.at(className).size();
22 metrics = Metrics(dataset, features, className, n_classes);
23 model.initialize();
24 buildModel(weights);
25 trainModel(weights, smoothing);
26 fitted = true;
27 return *this;
28 }
29 void Classifier::buildDataset(torch::Tensor& ytmp)
30 {
31 try {
32 auto yresized = torch::transpose(ytmp.view({ ytmp.size(0), 1 }), 0, 1);
33 dataset = torch::cat({ dataset, yresized }, 0);
34 }
35 catch (const std::exception& e) {
36 std::stringstream oss;
37 oss << "* Error in X and y dimensions *\n";
38 oss << "X dimensions: " << dataset.sizes() << "\n";
39 oss << "y dimensions: " << ytmp.sizes();
40 throw std::runtime_error(oss.str());
41 }
42 }
43 void Classifier::trainModel(const torch::Tensor& weights, Smoothing_t smoothing)
44 {
45 model.fit(dataset, weights, features, className, states, smoothing);
46 }
47 // X is nxm where n is the number of features and m the number of samples
48 Classifier& Classifier::fit(torch::Tensor& X, torch::Tensor& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
49 {
50 dataset = X;
51 buildDataset(y);
52 const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
53 return build(features, className, states, weights, smoothing);
54 }
55 // X is nxm where n is the number of features and m the number of samples
56 Classifier& Classifier::fit(std::vector<std::vector<int>>& X, std::vector<int>& y, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
57 {
58 dataset = torch::zeros({ static_cast<int>(X.size()), static_cast<int>(X[0].size()) }, torch::kInt32);
59 for (int i = 0; i < X.size(); ++i) {
60 dataset.index_put_({ i, "..." }, torch::tensor(X[i], torch::kInt32));
61 }
62 auto ytmp = torch::tensor(y, torch::kInt32);
63 buildDataset(ytmp);
64 const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
65 return build(features, className, states, weights, smoothing);
66 }
67 Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const Smoothing_t smoothing)
68 {
69 this->dataset = dataset;
70 const torch::Tensor weights = torch::full({ dataset.size(1) }, 1.0 / dataset.size(1), torch::kDouble);
71 return build(features, className, states, weights, smoothing);
72 }
73 Classifier& Classifier::fit(torch::Tensor& dataset, const std::vector<std::string>& features, const std::string& className, std::map<std::string, std::vector<int>>& states, const torch::Tensor& weights, const Smoothing_t smoothing)
74 {
75 this->dataset = dataset;
76 return build(features, className, states, weights, smoothing);
77 }
78 void Classifier::checkFitParameters()
79 {
80 if (torch::is_floating_point(dataset)) {
81 throw std::invalid_argument("dataset (X, y) must be of type Integer");
82 }
83 if (dataset.size(0) - 1 != features.size()) {
84 throw std::invalid_argument("Classifier: X " + std::to_string(dataset.size(0) - 1) + " and features " + std::to_string(features.size()) + " must have the same number of features");
85 }
86 if (states.find(className) == states.end()) {
87 throw std::invalid_argument("class name not found in states");
88 }
89 for (auto feature : features) {
90 if (states.find(feature) == states.end()) {
91 throw std::invalid_argument("feature [" + feature + "] not found in states");
92 }
93 }
94 }
95 torch::Tensor Classifier::predict(torch::Tensor& X)
96 {
97 if (!fitted) {
98 throw std::logic_error(CLASSIFIER_NOT_FITTED);
99 }
100 return model.predict(X);
101 }
102 std::vector<int> Classifier::predict(std::vector<std::vector<int>>& X)
103 {
104 if (!fitted) {
105 throw std::logic_error(CLASSIFIER_NOT_FITTED);
106 }
107 auto m_ = X[0].size();
108 auto n_ = X.size();
109 std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
110 for (auto i = 0; i < n_; i++) {
111 Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
112 }
113 auto yp = model.predict(Xd);
114 return yp;
115 }
116 torch::Tensor Classifier::predict_proba(torch::Tensor& X)
117 {
118 if (!fitted) {
119 throw std::logic_error(CLASSIFIER_NOT_FITTED);
120 }
121 return model.predict_proba(X);
122 }
123 std::vector<std::vector<double>> Classifier::predict_proba(std::vector<std::vector<int>>& X)
124 {
125 if (!fitted) {
126 throw std::logic_error(CLASSIFIER_NOT_FITTED);
127 }
128 auto m_ = X[0].size();
129 auto n_ = X.size();
130 std::vector<std::vector<int>> Xd(n_, std::vector<int>(m_, 0));
131 // Convert to nxm vector
132 for (auto i = 0; i < n_; i++) {
133 Xd[i] = std::vector<int>(X[i].begin(), X[i].end());
134 }
135 auto yp = model.predict_proba(Xd);
136 return yp;
137 }
138 float Classifier::score(torch::Tensor& X, torch::Tensor& y)
139 {
140 torch::Tensor y_pred = predict(X);
141 return (y_pred == y).sum().item<float>() / y.size(0);
142 }
143 float Classifier::score(std::vector<std::vector<int>>& X, std::vector<int>& y)
144 {
145 if (!fitted) {
146 throw std::logic_error(CLASSIFIER_NOT_FITTED);
147 }
148 return model.score(X, y);
149 }
150 std::vector<std::string> Classifier::show() const
151 {
152 return model.show();
153 }
154 void Classifier::addNodes()
155 {
156 // Add all nodes to the network
157 for (const auto& feature : features) {
158 model.addNode(feature);
159 }
160 model.addNode(className);
161 }
162 int Classifier::getNumberOfNodes() const
163 {
164 // Features does not include class
165 return fitted ? model.getFeatures().size() : 0;
166 }
167 int Classifier::getNumberOfEdges() const
168 {
169 return fitted ? model.getNumEdges() : 0;
170 }
171 int Classifier::getNumberOfStates() const
172 {
173 return fitted ? model.getStates() : 0;
174 }
175 int Classifier::getClassNumStates() const
176 {
177 return fitted ? model.getClassNumStates() : 0;
178 }
179 std::vector<std::string> Classifier::topological_order()
180 {
181 return model.topological_sort();
182 }
183 std::string Classifier::dump_cpt() const
184 {
185 return model.dump_cpt();
186 }
187 void Classifier::setHyperparameters(const nlohmann::json& hyperparameters)
188 {
189 if (!hyperparameters.empty()) {
190 throw std::invalid_argument("Invalid hyperparameters" + hyperparameters.dump());
191 }
192 }
193}