Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pybrush/BrushEstimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,6 @@ def predict_proba(self, X):
feature_names=self.feature_names_,
validation_size=0.0)


prob = self.best_estimator_.program.predict_proba(data)

if self.parameters_.n_classes == 2:
Expand Down
14 changes: 11 additions & 3 deletions src/bandit/bandit.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#include "bandit.h"
#include <typeinfo> // FOR DEBUGGING PURPOSES. TODO: remove it later

namespace Brush {
namespace MAB {
Expand Down Expand Up @@ -35,8 +34,6 @@ Bandit::Bandit(string type, map<string, float> arms_probs) : type(type) {
}

void Bandit::set_bandit() {
// TODO: a flag that is set to true when this function is called. make all
// other methods to raise an error if bandit was not set
if (type == "thompson") {
pbandit = make_unique<ThompsonSamplingBandit>(probabilities);
} else if (type == "dynamic_thompson") {
Expand All @@ -46,6 +43,14 @@ void Bandit::set_bandit() {
} else {
HANDLE_ERROR_THROW("Undefined Selection Operator " + this->type + "\n");
}

bandit_set = true;
}

void Bandit::ensure_bandit_set() const {
if (!bandit_set || !pbandit) {
HANDLE_ERROR_THROW("Bandit operator is not set. Call set_bandit() before use.\n");
}
}

string Bandit::get_type() {
Expand Down Expand Up @@ -73,6 +78,7 @@ void Bandit::set_probs(map<string, float> arms_probs) {
}

map<string, float> Bandit::sample_probs(bool update) {
ensure_bandit_set();
map<string, float> new_probs = this->pbandit->sample_probs(update);

// making all probabilities strictly positive
Expand All @@ -88,10 +94,12 @@ map<string, float> Bandit::sample_probs(bool update) {
}

string Bandit::choose() {
ensure_bandit_set();
return this->pbandit->choose();
}

void Bandit::update(string arm, float reward) {
ensure_bandit_set();
this->pbandit->update(arm, reward);
}

Expand Down
4 changes: 4 additions & 0 deletions src/bandit/bandit.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ struct Bandit
* @param reward The received reward.
*/
void update(string arm, float reward);

private:
bool bandit_set = false;
void ensure_bandit_set() const;
};

//TODO: serialization should save the type of bandit and its parameters
Expand Down
12 changes: 0 additions & 12 deletions src/bindings/bind_engines.h
Original file line number Diff line number Diff line change
@@ -1,34 +1,23 @@
#include "module.h"
#include "../engine.h"
#include "../engine.cpp"

// TODO: figure out why do I need to include the whole thing (otherwise it gives me symbol errors)
#include "../bandit/bandit.h"
#include "../bandit/bandit_operator.h"
#include "../bandit/dummy.h"
#include "../bandit/thompson.h"

#include "../ind/individual.h"
#include "../ind/individual.cpp"
#include "../vary/variation.h"
#include "../vary/variation.cpp"

#include "../eval/evaluation.h"
#include "../eval/evaluation.cpp"

#include "../pop/population.cpp"
#include "../pop/population.h"

#include "../selection/selection.h"
#include "../selection/selection.cpp"
#include "../selection/selection_operator.h"
#include "../selection/selection_operator.cpp"
#include "../selection/nsga2.h"
#include "../selection/nsga2.cpp"
#include "../selection/lexicase.h"
#include "../selection/lexicase.cpp"

#include "../pop/archive.cpp"
#include "../pop/archive.h"

using Reg = Brush::RegressorEngine;
Expand Down Expand Up @@ -94,7 +83,6 @@ void bind_engine(py::module& m, string name)
},
[](nl::json j) { // __setstate__
T p = j;
// TODO: do I need to get the data and ss reference, then call init for this new instance?
return p;
})
)
Expand Down
1 change: 0 additions & 1 deletion src/bindings/bind_evaluator.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "module.h"
#include "../eval/evaluation.h"
#include "../eval/evaluation.cpp"

namespace py = pybind11;
namespace br = Brush;
Expand Down
6 changes: 0 additions & 6 deletions src/bindings/bind_selection.h
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
#include "module.h"

// TODO: figure out why im having symbol errors (if i dont include the cpp here as well)
#include "../selection/selection.h"
#include "../selection/selection.cpp"
#include "../selection/selection_operator.h"
#include "../selection/selection_operator.cpp"
#include "../selection/nsga2.h"
#include "../selection/nsga2.cpp"
#include "../selection/lexicase.h"
#include "../selection/lexicase.cpp"

#include "../pop/population.cpp"
#include "../pop/population.h"

namespace py = pybind11;
Expand Down
5 changes: 0 additions & 5 deletions src/bindings/bind_variation.h
Original file line number Diff line number Diff line change
@@ -1,22 +1,17 @@
#include "module.h"

#include "../pop/population.h"
#include "../pop/population.cpp"

#include "../bandit/bandit.h"
#include "../bandit/bandit_operator.h"
#include "../bandit/dummy.h"
#include "../bandit/thompson.h"
#include "../ind/individual.h"
#include "../ind/individual.cpp"

#include "../simplification/constants.cpp"
#include "../simplification/constants.h"
#include "../simplification/inexact.cpp"
#include "../simplification/inexact.h"

#include "../vary/variation.h"
#include "../vary/variation.cpp"

namespace py = pybind11;
namespace nl = nlohmann;
Expand Down
18 changes: 7 additions & 11 deletions src/data/data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ void Dataset::init()
back_inserter(validation_data_idx),
[&](int element) { return element; });
}
else if (classification && true) // figuring out training and validation data indexes
else if (classification) // figuring out training and validation data indexes
{ // Stratified split for classification problems. TODO: parameters to change stratify behavior? (and set false by default)
std::map<float, vector<int>> class_indices; // TODO: I think I can remove many std:: from the code..
for (size_t i = 0; i < n_samples; ++i) {
Expand All @@ -335,15 +335,11 @@ void Dataset::init()
std::transform(idx.begin(), idx.begin() + n_train_samples,
back_inserter(training_data_idx),
[&](int element) { return indices[element]; });

if (n_class_samples - n_train_samples == 0)
{
// same indices from the training data to the validation data
std::transform(idx.begin(), idx.begin() + n_train_samples,
back_inserter(validation_data_idx),
[&](int element) { return indices[element]; });
}
else

// stratified split so train/validation never overlap when a class is
// too small. Now, if a class has no remaining samples for validation,
// it contributes only to training (validation gets none for that class).
if (n_class_samples - n_train_samples > 0)
{
std::transform(idx.begin() + n_train_samples, idx.end(),
back_inserter(validation_data_idx),
Expand All @@ -355,7 +351,7 @@ void Dataset::init()
// logic for non-classification problems
vector<size_t> idx(n_samples);

if (shuffle_split) // TODO: make sure this works with multiple threads and fixed random state
if (shuffle_split)
idx = r.shuffled_index(n_samples);
else
std::iota(idx.begin(), idx.end(), 0);
Expand Down
7 changes: 6 additions & 1 deletion src/engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -687,4 +687,9 @@ void Engine<T>::run(Dataset &data)
//When you have tasks that are created at runtime (e.g., subflow,
// cudaFlow), you need to execute the graph first to spawn these tasks and dump the entire graph.
}
}
}

template class Brush::Engine<Brush::ProgramType::Regressor>;
template class Brush::Engine<Brush::ProgramType::BinaryClassifier>;
template class Brush::Engine<Brush::ProgramType::MulticlassClassifier>;
template class Brush::Engine<Brush::ProgramType::Representer>;
6 changes: 5 additions & 1 deletion src/engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,6 @@ class Engine{
/// train the model
void run(Dataset &d);

// TODO: should params and ss be private? (that would require better json handling)
Parameters params; ///< hyperparameters of brush, which the user can interact
SearchSpace ss;

Expand Down Expand Up @@ -161,5 +160,10 @@ NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Engine<PT::BinaryClassifier>, params, best_in
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Engine<PT::MulticlassClassifier>, params, best_ind, archive, pop, ss, is_fitted);
NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Engine<PT::Representer>, params, best_ind, archive, pop, ss, is_fitted);

extern template class Engine<PT::Regressor>;
extern template class Engine<PT::BinaryClassifier>;
extern template class Engine<PT::MulticlassClassifier>;
extern template class Engine<PT::Representer>;

} // Brush
#endif
7 changes: 6 additions & 1 deletion src/eval/evaluation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,9 @@ void Evaluation<T>::assign_fit(Individual<T>& ind, const Dataset& data,
}

} // Pop
} // Brush
} // Brush

template class Brush::Eval::Evaluation<Brush::ProgramType::Regressor>;
template class Brush::Eval::Evaluation<Brush::ProgramType::BinaryClassifier>;
template class Brush::Eval::Evaluation<Brush::ProgramType::MulticlassClassifier>;
template class Brush::Eval::Evaluation<Brush::ProgramType::Representer>;
5 changes: 5 additions & 0 deletions src/eval/evaluation.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ class Evaluation {
// representation program (TODO: implement)
};

extern template class Evaluation<PT::Regressor>;
extern template class Evaluation<PT::BinaryClassifier>;
extern template class Evaluation<PT::MulticlassClassifier>;
extern template class Evaluation<PT::Representer>;

} //selection
} //brush
#endif
10 changes: 5 additions & 5 deletions src/eval/metrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ float average_precision_score(const VectorXf& y, const VectorXf& predict_proba,
// Assuming y contains binary labels (0 or 1)
int num_instances = y.size();

float eps = 1e-4f; // first we set the loss vector values
float eps = 1e-6f; // first we set the loss vector values
loss.resize(num_instances);
for (int i = 0; i < num_instances; ++i) {
float p = predict_proba(i);
Expand Down Expand Up @@ -182,7 +182,7 @@ float average_precision_score(const VectorXf& y, const VectorXf& predict_proba,

// detect constant prediction case (all p_sorted equal within tolerance).
// because p_sorted is sorted, the first element is the maximum, and the last is the minimum,
if (abs(p_sorted.back() - p_sorted.front()) <= eps) {
if (fabs(p_sorted.back() - p_sorted.front()) <= eps) {
// All predictions are (effectively) constant.
float total_weight = std::accumulate(w_sorted.begin(), w_sorted.end(), 0.0f);

Expand All @@ -193,7 +193,7 @@ float average_precision_score(const VectorXf& y, const VectorXf& predict_proba,

// Find the indexes where prediction changes, so we can treat it as one block
vector<int> unique_indices = {}; // this one will be used to calculate the AUC
set<int> unique_probas = {}; // keep track of unique elements (this wont be used other than that)
set<float> unique_probas = {}; // keep track of unique elements (this wont be used other than that)

for (int i=0; i<p_sorted.size(); ++i)
if (unique_probas.insert(p_sorted.at(i)).second)
Expand Down Expand Up @@ -223,7 +223,7 @@ float average_precision_score(const VectorXf& y, const VectorXf& predict_proba,

// integrate PR curve
float average_precision = 0.0f;
for (size_t i = 0; i < num_instances; ++i) {
for (size_t i = 0; i < precision.size() - 1; ++i) {
average_precision += (recall[i+1] - recall[i]) * precision[i+1];
}

Expand Down Expand Up @@ -342,4 +342,4 @@ float multi_zero_one_loss(const VectorXf& y,
}

} // metrics
} // Brush
} // Brush
7 changes: 2 additions & 5 deletions src/ind/individual.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ namespace Pop{

template<ProgramType T>
class Individual{
public: // TODO: make these private (and work with nlohman json)
public:
Program<T> program; ///< executable data structure

// store just info that we dont have a getter. size, depth, complexity: they can all be obtained with program.<function here>
Expand Down Expand Up @@ -64,7 +64,6 @@ class Individual{
variation = "born";
};

// TODO: replace occurences of program.fit with these (also predict and predict_proba)
Individual<T> &fit(const Dataset& data) {
program.fit(data);
this->is_fitted_ = true;
Expand Down Expand Up @@ -153,7 +152,6 @@ class Individual{
}; /// set parent ids using parents
void set_parents(const vector<unsigned>& parents){ parent_id = parents; }; /// set parent ids using id values

// TODO: USE setters and getters intead of accessing it directly
// template<ProgramType T>
// void Individual<T>::set_objectives(const vector<string>& objectives)

Expand Down Expand Up @@ -182,7 +180,6 @@ class Individual{
vector<float> weights;
weights.resize(0);
for (const auto& obj : objectives) {
// TODO: do i need to use find or this can be done directly?
auto it = weightsMap.find(obj);
if (it != weightsMap.end()) {
weights.push_back(it->second);
Expand Down Expand Up @@ -215,7 +212,7 @@ void to_json(json &j, const Individual<T> &p)

template<ProgramType T>
void from_json(const json &j, Individual<T>& p)
{// TODO: figure out if this works with private attributes and try to actually make them private (and use getters and setters)
{
j.at("program").get_to( p.program );
j.at("fitness").get_to( p.fitness );
j.at("id").get_to( p.id );
Expand Down
2 changes: 1 addition & 1 deletion src/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ struct Parameters
unsigned int max_size = 50;

vector<string> objectives{"scorer","linear_complexity"}; // scorer should be generic and deducted based on mode
string bandit = "thompson"; // TODO: should I rename dummy?
string bandit = "thompson";
string sel = "lexicase"; //selection method
string surv = "nsga2"; //survival method
std::unordered_map<string, float> functions;
Expand Down
22 changes: 7 additions & 15 deletions src/pop/archive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,26 +51,14 @@ bool Archive<T>::sortObj1(const Individual<T>& lhs,
}

template<ProgramType T>
bool Archive<T>::sameFitComplexity(const Individual<T>& lhs,
bool Archive<T>::sameObjectives(const Individual<T>& lhs,
const Individual<T>& rhs)
{
// TODO: delete this one

return (lhs.fitness == rhs.fitness);

// fitness' operator== is overloaded to compare wvalues.
// we also check complexity equality to avoid the case where the user
// did not specified complexity as one of the objectives
// return (lhs.fitness == rhs.fitness
// && lhs.fitness.complexity == rhs.fitness.complexity);
}

// TODO: i could get rid of one of these
template<ProgramType T>
bool Archive<T>::sameObjectives(const Individual<T>& lhs,
const Individual<T>& rhs)
{
return (lhs.fitness == rhs.fitness);
}

template<ProgramType T>
Expand Down Expand Up @@ -157,12 +145,16 @@ void Archive<T>::update(Population<T>& pop, const Parameters& params)
std::stable_sort(individuals.begin(), individuals.end(), &sortObj1);
}

/* auto it = std::unique(individuals.begin(),individuals.end(), &sameFitComplexity); */
auto it = std::unique(individuals.begin(),individuals.end(),
&sameObjectives);

individuals.resize(std::distance(individuals.begin(),it));
}

} // Pop
} // Brush
} // Brush

template struct Brush::Pop::Archive<Brush::ProgramType::Regressor>;
template struct Brush::Pop::Archive<Brush::ProgramType::BinaryClassifier>;
template struct Brush::Pop::Archive<Brush::ProgramType::MulticlassClassifier>;
template struct Brush::Pop::Archive<Brush::ProgramType::Representer>;
Loading
Loading