Random Forest¶

A Random Forest is an ensemble learning method which implements multiple decision trees during training. It predicts by using a combination rule on the outputs of individual decision trees.

See [Bre01] for a detailed introduction.

Example¶

CDenseFeatures (here 64 bit floats aka RealFeatures) and CMulticlassLabels are created from training and test data file

features_train = RealFeatures(f_feats_train)
features_test = RealFeatures(f_feats_test)
labels_train = MulticlassLabels(f_labels_train)
labels_test = MulticlassLabels(f_labels_test)

features_train = RealFeatures(f_feats_train);
features_test = RealFeatures(f_feats_test);
labels_train = MulticlassLabels(f_labels_train);
labels_test = MulticlassLabels(f_labels_test);

RealFeatures features_train = new RealFeatures(f_feats_train);
RealFeatures features_test = new RealFeatures(f_feats_test);
MulticlassLabels labels_train = new MulticlassLabels(f_labels_train);
MulticlassLabels labels_test = new MulticlassLabels(f_labels_test);

features_train = Modshogun::RealFeatures.new f_feats_train
features_test = Modshogun::RealFeatures.new f_feats_test
labels_train = Modshogun::MulticlassLabels.new f_labels_train
labels_test = Modshogun::MulticlassLabels.new f_labels_test

features_train <- RealFeatures(f_feats_train)
features_test <- RealFeatures(f_feats_test)
labels_train <- MulticlassLabels(f_labels_train)
labels_test <- MulticlassLabels(f_labels_test)

RealFeatures features_train = new RealFeatures(f_feats_train);
RealFeatures features_test = new RealFeatures(f_feats_test);
MulticlassLabels labels_train = new MulticlassLabels(f_labels_train);
MulticlassLabels labels_test = new MulticlassLabels(f_labels_test);

auto features_train = some<CDenseFeatures<float64_t>>(f_feats_train);
auto features_test = some<CDenseFeatures<float64_t>>(f_feats_test);
auto labels_train = some<CMulticlassLabels>(f_labels_train);
auto labels_test = some<CMulticlassLabels>(f_labels_test);

Combination rules to be used for prediction are derived form the CCombinationRule class. Here we create a CMajorityVote class to be used as a combination rule.

m_vote = MajorityVote()

m_vote = MajorityVote();

MajorityVote m_vote = new MajorityVote();

m_vote = Modshogun::MajorityVote.new 

m_vote <- MajorityVote()

MajorityVote m_vote = new MajorityVote();

auto m_vote = some<CMajorityVote>();

Next an instance of CRandomForest is created. The parameters provided are the number of attributes to be chosen randomly to select from and the number of trees.

rand_forest = RandomForest(1, 10)
rand_forest.set_combination_rule(m_vote)
rand_forest.set_labels(labels_train)

rand_forest = RandomForest(1, 10);
rand_forest.set_combination_rule(m_vote);
rand_forest.set_labels(labels_train);

RandomForest rand_forest = new RandomForest(1, 10);
rand_forest.set_combination_rule(m_vote);
rand_forest.set_labels(labels_train);

rand_forest = Modshogun::RandomForest.new 1, 10
rand_forest.set_combination_rule m_vote
rand_forest.set_labels labels_train

rand_forest <- RandomForest(1, 10)
rand_forest$set_combination_rule(m_vote)
rand_forest$set_labels(labels_train)

RandomForest rand_forest = new RandomForest(1, 10);
rand_forest.set_combination_rule(m_vote);
rand_forest.set_labels(labels_train);

auto rand_forest = some<CRandomForest>(1, 10);
rand_forest->set_combination_rule(m_vote);
rand_forest->set_labels(labels_train);

Then we run the train random forest and apply it to test data, which here gives CMulticlassLabels.

rand_forest.train(features_train)
labels_predict = rand_forest.apply_multiclass(features_test)

rand_forest.train(features_train);
labels_predict = rand_forest.apply_multiclass(features_test);

rand_forest.train(features_train);
MulticlassLabels labels_predict = rand_forest.apply_multiclass(features_test);

rand_forest.train features_train
labels_predict = rand_forest.apply_multiclass features_test

rand_forest$train(features_train)
labels_predict <- rand_forest$apply_multiclass(features_test)

rand_forest.train(features_train);
MulticlassLabels labels_predict = rand_forest.apply_multiclass(features_test);

rand_forest->train(features_train);
auto labels_predict = rand_forest->apply_multiclass(features_test);

We can evaluate test performance via e.g. CMulticlassAccuracy as well as get the “out of bag error”.

acc = MulticlassAccuracy()
oob = rand_forest.get_oob_error(acc)
accuracy = acc.evaluate(labels_predict, labels_test)

acc = MulticlassAccuracy();
oob = rand_forest.get_oob_error(acc);
accuracy = acc.evaluate(labels_predict, labels_test);

MulticlassAccuracy acc = new MulticlassAccuracy();
double oob = rand_forest.get_oob_error(acc);
double accuracy = acc.evaluate(labels_predict, labels_test);

acc = Modshogun::MulticlassAccuracy.new 
oob = rand_forest.get_oob_error acc
accuracy = acc.evaluate labels_predict, labels_test

acc <- MulticlassAccuracy()
oob <- rand_forest$get_oob_error(acc)
accuracy <- acc$evaluate(labels_predict, labels_test)

MulticlassAccuracy acc = new MulticlassAccuracy();
double oob = rand_forest.get_oob_error(acc);
double accuracy = acc.evaluate(labels_predict, labels_test);

auto acc = some<CMulticlassAccuracy>();
auto oob = rand_forest->get_oob_error(acc);
auto accuracy = acc->evaluate(labels_predict, labels_test);

References¶

Wikipedia: Random_forest

Wikipedia: Out-of-bag_error

[Bre01]

Leo Breiman. Random forests. Machine Learning, 45:5–32, 2001.