77         SG_ERROR(
"label type supplied is not supported\n")
 
  101     REQUIRE(data, 
"Data required for classification in apply_multiclass\n")
 
  113     REQUIRE(data, 
"Data required for classification in apply_multiclass\n")
 
  140             root=
dynamic_cast<bnode_t*
>(element);
 
  159         root=
dynamic_cast<bnode_t*
>(element);
 
  161         SG_ERROR(
"%d element is NULL\n",min_index);
 
  210     REQUIRE(folds>1,
"Number of folds is expected to be greater than 1. Supplied value is %d\n",folds)
 
  221     REQUIRE(depth>0,
"Max allowed tree depth should be greater than 0. Supplied value is %d\n",depth)
 
  232     REQUIRE(nsize>0,
"Min allowed node size should be greater than 0. Supplied value is %d\n",nsize)
 
  238     REQUIRE(ep>=0,
"Input epsilon value is expected to be greater than or equal to 0\n")
 
  244     REQUIRE(data,
"Data required for training\n")
 
  253                     " number of vectors in data (presently %d)",
m_weights.
vlen,num_vectors)
 
  265             "be same as number of features in data (presently %d)",
m_nominal.
vlen,num_features)
 
  269         SG_WARNING(
"Feature types are not specified. All features are considered as continuous in training")
 
  287     REQUIRE(labels,
"labels have to be supplied\n");
 
  288     REQUIRE(data,
"data matrix has to be supplied\n");
 
  302                 for (int32_t i=0;i<labels_vec.
vlen;i++)
 
  303                     sum+=labels_vec[i]*weights[i];
 
  308                 node->
data.node_label=sum/tot;
 
  309                 node->
data.total_weight=tot;
 
  318                 int32_t 
max=weights[0];
 
  321                 int32_t c=weights[0];
 
  322                 for (int32_t i=1;i<lab.
vlen;i++)
 
  324                     if (lab[i]==lab[i-1])
 
  346                 node->
data.node_label=lab[maxi];
 
  349                 node->
data.total_weight=weights.
sum(weights);
 
  350                 node->
data.weight_minus_node=node->
data.total_weight-
max;
 
  354             SG_ERROR(
"mode should be either PT_MULTICLASS or PT_REGRESSION\n");
 
  361         node->
data.num_leaves=1;
 
  362         node->
data.weight_minus_branch=node->
data.weight_minus_node;
 
  369         node->
data.num_leaves=1;
 
  370         node->
data.weight_minus_branch=node->
data.weight_minus_node;
 
  381     int32_t num_missing_final=0;
 
  385     int32_t best_attribute=
compute_best_attribute(mat,weights,labels_vec,left,right,left_final,num_missing_final,c_left,c_right);
 
  387     if (best_attribute==-1)
 
  389         node->
data.num_leaves=1;
 
  390         node->
data.weight_minus_branch=node->
data.weight_minus_node;
 
  399     if (num_missing_final>0)
 
  403         for (int32_t i=0;i<num_vecs;i++)
 
  405             if (mat(best_attribute,i)!=
MISSING)
 
  406                 is_left_final[ilf++]=left_final[i];
 
  412     int32_t count_left=0;
 
  413     for (int32_t c=0;c<num_vecs;c++)
 
  414         count_left=(left_final[c])?count_left+1:count_left;
 
  422     for (int32_t c=0;c<num_vecs;c++)
 
  427             weightsl[l++]=weights[c];
 
  432             weightsr[r++]=weights[c];
 
  451     node->
data.attribute_id=best_attribute;
 
  452     node->
left(left_child);
 
  453     node->
right(right_child);
 
  454     left_child->
data.transit_into_values=left_transit;
 
  455     right_child->
data.transit_into_values=right_transit;
 
  456     node->
data.num_leaves=left_child->
data.num_leaves+right_child->
data.num_leaves;
 
  457     node->
data.weight_minus_branch=left_child->
data.weight_minus_branch+right_child->
data.weight_minus_branch;
 
  470     ulabels[0]=labels_vec[sidx[0]];
 
  473     for (int32_t i=1;i<sidx.vlen;i++)
 
  475         if (labels_vec[sidx[i]]<=labels_vec[sidx[start]]+delta)
 
  479         ulabels[n_ulabels]=labels_vec[sidx[i]];
 
  488     int32_t &count_right)
 
  505     total_wclasses.
zero();
 
  508     for (int32_t i=0;i<num_vecs;i++)
 
  510         for (int32_t j=0;j<n_ulabels;j++)
 
  512             if (
CMath::abs(labels_vec[i]-ulabels[j])<=delta)
 
  515                 total_wclasses[j]+=weights[i];
 
  522     int32_t best_attribute=-1;
 
  524     for (int32_t i=0;i<num_feats;i++)
 
  527         for (int32_t j=0;j<num_vecs;j++)
 
  534         int32_t n_nm_vecs=feats.
vlen;
 
  535         while (feats[sorted_args[n_nm_vecs-1]]==
MISSING)
 
  537             total_wclasses[simple_labels[sorted_args[n_nm_vecs-1]]]-=weights[sorted_args[n_nm_vecs-1]];
 
  542         if (feats[sorted_args[n_nm_vecs-1]]<=feats[sorted_args[0]]+
EQ_DELTA)
 
  551             simple_feats[sorted_args[0]]=0;
 
  553             for (int32_t j=1;j<n_nm_vecs;j++)
 
  555                 if (feats[sorted_args[j]]==feats[sorted_args[j-1]])
 
  556                     simple_feats[sorted_args[j]]=c;
 
  558                     simple_feats[sorted_args[j]]=(++c);
 
  562             ufeats[0]=feats[sorted_args[0]];
 
  564             for (int32_t j=1;j<n_nm_vecs;j++)
 
  566                 if (feats[sorted_args[j]]==feats[sorted_args[j-1]])
 
  569                     ufeats[++u]=feats[sorted_args[j]];
 
  574             for (int32_t k=1;k<num_cases;k++)
 
  589                 for (int32_t p=0;p<c+1;p++)
 
  593                 for (int32_t j=0;j<n_nm_vecs;j++)
 
  595                     is_left[sorted_args[j]]=feats_left[simple_feats[sorted_args[j]]];
 
  596                     if (is_left[sorted_args[j]])
 
  597                         wleft[simple_labels[sorted_args[j]]]+=weights[sorted_args[j]];
 
  599                         wright[simple_labels[sorted_args[j]]]+=weights[sorted_args[j]];
 
  604                     g=
gain(wleft,wright,total_wclasses);
 
  606                     g=
gain(wleft,wright,total_wclasses,ulabels);
 
  608                     SG_ERROR(
"Undefined problem statement\n");
 
  615                     num_missing_final=num_vecs-n_nm_vecs;
 
  618                     for (int32_t l=0;l<c+1;l++)
 
  619                         count_left=(feats_left[l])?count_left+1:count_left;
 
  621                     count_right=c+1-count_left;
 
  625                     for (int32_t w=0;w<c+1;w++)
 
  630                             right[r++]=ufeats[w];
 
  640             left_wclasses.
zero();
 
  645             right_wclasses[simple_labels[sorted_args[0]]]-=weights[sorted_args[0]];
 
  646             left_wclasses[simple_labels[sorted_args[0]]]+=weights[sorted_args[0]];
 
  647             for (int32_t j=1;j<n_nm_vecs;j++)
 
  649                 if (feats[sorted_args[j]]<=z+
EQ_DELTA)
 
  651                     right_wclasses[simple_labels[sorted_args[j]]]-=weights[sorted_args[j]];
 
  652                     left_wclasses[simple_labels[sorted_args[j]]]+=weights[sorted_args[j]];
 
  659                     g=
gain(left_wclasses,right_wclasses,total_wclasses);
 
  661                     g=
gain(left_wclasses,right_wclasses,total_wclasses,ulabels);
 
  663                     SG_ERROR(
"Undefined problem statement\n");
 
  670                     num_missing_final=num_vecs-n_nm_vecs;
 
  673                 z=feats[sorted_args[j]];
 
  674                 if (feats[sorted_args[n_nm_vecs-1]]<=z+
EQ_DELTA)
 
  677                 right_wclasses[simple_labels[sorted_args[j]]]-=weights[sorted_args[j]];
 
  678                 left_wclasses[simple_labels[sorted_args[j]]]+=weights[sorted_args[j]];
 
  683         while (n_nm_vecs<feats.
vlen)
 
  685             total_wclasses[simple_labels[sorted_args[n_nm_vecs-1]]]+=weights[sorted_args[n_nm_vecs-1]];
 
  690     if (best_attribute==-1)
 
  695         left[0]=best_threshold;
 
  696         right[0]=best_threshold;
 
  699         for (int32_t i=0;i<num_vecs;i++)
 
  700             is_left_final[i]=(mat(best_attribute,i)<=best_threshold);
 
  703     return best_attribute;
 
  795     int32_t num_unique=feats.
unique(feats.vector,feats.vlen);
 
  799     for (int32_t j=0;j<num_unique-1;j++)
 
  820             lambda=(p-(1-numer/denom))/p;
 
  822             lambda=(p-(1-numerc/denom))/p;
 
  852     int32_t num_unique=feats.
unique(feats.vector,feats.vlen);
 
  855     int32_t num_cases=
CMath::pow(2,(num_unique-1));
 
  856     for (int32_t j=1;j<num_cases;j++)
 
  859         for (int32_t k=0;k<num_unique;k++)
 
  865             for (int32_t q=0;q<num_unique;q++)
 
  867                 if (feats[q]==m(attr,intersect_vecs->
get_element(k)))
 
  869                     intersect_vecs_left[k]=feats_left[q];
 
  880             if (intersect_vecs_left[k]==is_left[intersect_vecs->
get_element(k)])
 
  889             lambda=(p-(1-numer/denom))/p;
 
  891             lambda=(p-(1-numerc/denom))/p;
 
  901                 for (int32_t q=0;q<num_unique;q++)
 
  903                     if (feats[q]==m(attr,missing_vecs->
get_element(k)))
 
  906                             is_left[missing_vecs->
get_element(k)]=feats_left[q];
 
  908                             is_left[missing_vecs->
get_element(k)]=!feats_left[q];
 
  929     return lsd_n-(lsd_l*(total_lweight/total_weight))-(lsd_r*(total_rweight/total_weight));
 
  941     return gini_n-(gini_l*(total_lweight/total_weight))-(gini_r*(total_rweight/total_weight));
 
  948     for (int32_t i=0;i<weighted_lab_classes.
vlen;i++)
 
  950         total_weight+=weighted_lab_classes[i];
 
  951         gini+=weighted_lab_classes[i]*weighted_lab_classes[i];
 
  954     gini=1.0-(gini/(total_weight*total_weight));
 
  962     for (int32_t i=0;i<weights.
vlen;i++)
 
  964         mean+=feats[i]*weights[i];
 
  965         total_weight+=weights[i];
 
  970     for (int32_t i=0;i<weights.
vlen;i++)
 
  971         dev+=weights[i]*(feats[i]-mean)*(feats[i]-mean);
 
  973     return dev/total_weight;
 
  980     for (int32_t i=0;i<num_vecs;i++)
 
  987         while(node->
data.num_leaves!=1)
 
  995                 for (int32_t k=0;k<comp.
vlen;k++)
 
  997                     if (comp[k]==sample[node->
data.attribute_id])
 
 1018                 if (sample[node->
data.attribute_id]<=leftchild->
data.transit_into_values[0])
 
 1034         labels[i]=node->
data.node_label;
 
 1053             SG_ERROR(
"mode should be either PT_MULTICLASS or PT_REGRESSION\n");
 
 1071     for (int32_t i=0;i<folds;i++)
 
 1076         for (int32_t j=0;j<num_vecs;j++)
 
 1086             SG_ERROR(
"Unfortunately you have reached the very low probability event where atleast one of " 
 1087                     "the subsets in cross-validation is not represented at all. Please re-run.")
 
 1121             if (jth_element!=NULL)
 
 1122                 current_root=
dynamic_cast<bnode_t*
>(jth_element);
 
 1124                 SG_ERROR(
"%d element is NULL which should not be",j);
 
 1145     int32_t min_index=-1;
 
 1157         for (int32_t j=0;j<folds;j++)
 
 1160             for (int32_t k=base;k<num_alphak[j]+base-1;k++)
 
 1173             base+=num_alphak[j];
 
 1186         best_tree_root=
dynamic_cast<bnode_t*
>(element);
 
 1188         SG_ERROR(
"%d element is NULL which should not be",min_index);
 
 1200     REQUIRE(labels,
"input labels cannot be NULL");
 
 1201     REQUIRE(reference,
"reference labels cannot be NULL")
 
 1212             for (int32_t i=0;i<weights.
vlen;i++)
 
 1223             for (int32_t i=0;i<weights.
vlen;i++)
 
 1250         t1_root=
dynamic_cast<bnode_t*
>(t1root);
 
 1252         SG_ERROR(
"t1_root is NULL. This is not expected\n")
 
 1256     while(t1_root->
data.num_leaves>1)
 
 1264             t2_root=
dynamic_cast<bnode_t*
>(t2root);
 
 1266             SG_ERROR(
"t1_root is NULL. This is not expected\n")
 
 1286     if (node->
data.num_leaves!=1)
 
 1294         weak_links[2]=(node->
data.weight_minus_node-node->
data.weight_minus_branch)/node->
data.total_weight;
 
 1295         weak_links[2]/=(node->
data.num_leaves-1.0);
 
 1307     if (node->
data.num_leaves==1)
 
 1311     g/=(node->
data.num_leaves-1.0);
 
 1314         node->
data.num_leaves=1;
 
 1315         node->
data.weight_minus_branch=node->
data.weight_minus_node;
 
 1327         node->
data.num_leaves=left->
data.num_leaves+right->
data.num_leaves;
 
 1328         node->
data.weight_minus_branch=left->
data.weight_minus_branch+right->
data.weight_minus_branch;
 
 1337     if (node->
data.num_leaves!=1)
 
 1345         node->
data.num_leaves=left->
data.num_leaves+right->
data.num_leaves;
 
 1346         node->
data.weight_minus_branch=left->
data.weight_minus_branch+right->
data.weight_minus_branch;
 
 1347         if (node->
data.weight_minus_node==node->
data.weight_minus_branch)
 
 1349             node->
data.num_leaves=1;
 
CLabels * apply_from_current_node(CDenseFeatures< float64_t > *feats, bnode_t *current)
The node of the tree structure forming a TreeMachine The node contains pointer to its parent and poin...
int32_t get_max_depth() const 
static void fill_vector(T *vec, int32_t len, T value)
virtual ELabelType get_label_type() const =0
void set_weights(SGVector< float64_t > w)
Real Labels are real-valued labels. 
SGVector< bool > get_feature_types() const 
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
int32_t get_min_node_size() const 
void set_machine_problem_type(EProblemType mode)
int32_t get_num_folds() const 
CDynamicObjectArray * prune_tree(CTreeMachine< CARTreeNodeData > *tree)
The class Labels models labels, i.e. class assignments of objects. 
real valued labels (e.g. for regression, classifier outputs) 
multi-class labels 0,1,... 
virtual CMulticlassLabels * apply_multiclass(CFeatures *data=NULL)
float64_t find_weakest_alpha(bnode_t *node)
float64_t least_squares_deviation(SGVector< float64_t > labels, SGVector< float64_t > weights, float64_t &total_weight)
void form_t1(bnode_t *node)
virtual bool is_label_valid(CLabels *lab) const 
static const float64_t EQ_DELTA
CTreeMachineNode< CARTreeNodeData > * get_root()
SGVector< bool > surrogate_split(SGMatrix< float64_t > data, SGVector< float64_t > weights, SGVector< bool > nm_left, int32_t attr)
virtual bool train_machine(CFeatures *data=NULL)
float64_t get_label(int32_t idx)
float64_t m_label_epsilon
virtual void set_labels(CLabels *lab)
void set_root(CTreeMachineNode< CARTreeNodeData > *root)
static void qsort(T *output, int32_t size)
Multiclass Labels for multi-class classification. 
static bool fequals(const T &a, const T &b, const float64_t eps, bool tolerant=false)
virtual void set_children(CDynamicObjectArray *children)
float64_t gini_impurity_index(SGVector< float64_t > weighted_lab_classes, float64_t &total_weight)
void clear_feature_types()
float64_t gain(SGVector< float64_t > wleft, SGVector< float64_t > wright, SGVector< float64_t > wtotal, SGVector< float64_t > labels)
Class SGObject is the base class of all shogun objects. 
bool set_element(T e, int32_t idx1, int32_t idx2=0, int32_t idx3=0)
virtual int32_t get_num_vectors() const 
void set_min_node_size(int32_t nsize)
void right(CBinaryTreeMachineNode *r)
void handle_missing_vecs_for_continuous_surrogate(SGMatrix< float64_t > m, CDynamicArray< int32_t > *missing_vecs, CDynamicArray< float64_t > *association_index, CDynamicArray< int32_t > *intersect_vecs, SGVector< bool > is_left, SGVector< float64_t > weights, float64_t p, int32_t attr)
void set_num_folds(int32_t folds)
static SGVector< index_t > argsort(SGVector< T > vector)
virtual void remove_subset()
SGVector< float64_t > get_weights() const 
CTreeMachine * clone_tree()
virtual void add_subset(SGVector< index_t > subset)
static T sum(T *vec, int32_t len)
Return sum(vec) 
virtual EFeatureClass get_feature_class() const =0
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
virtual CBinaryTreeMachineNode< CARTreeNodeData > * CARTtrain(CFeatures *data, SGVector< float64_t > weights, CLabels *labels, int32_t level)
void set_max_depth(int32_t depth)
void handle_missing_vecs_for_nominal_surrogate(SGMatrix< float64_t > m, CDynamicArray< int32_t > *missing_vecs, CDynamicArray< float64_t > *association_index, CDynamicArray< int32_t > *intersect_vecs, SGVector< bool > is_left, SGVector< float64_t > weights, float64_t p, int32_t attr)
structure to store data of a node of CART. This can be used as a template type in TreeMachineNode cla...
all of classes and functions are contained in the shogun namespace 
virtual void remove_subset()
void set_feature_types(SGVector< bool > ft)
CBinaryTreeMachineNode< CARTreeNodeData > bnode_t
virtual int32_t compute_best_attribute(SGMatrix< float64_t > mat, SGVector< float64_t > weights, SGVector< float64_t > labels_vec, SGVector< float64_t > left, SGVector< float64_t > right, SGVector< bool > is_left_final, int32_t &num_missing, int32_t &count_left, int32_t &count_right)
The class Features is the base class of all feature objects. 
SGVector< bool > m_nominal
float64_t compute_error(CLabels *labels, CLabels *reference, SGVector< float64_t > weights)
void cut_weakest_link(bnode_t *node, float64_t alpha)
static const float64_t MIN_SPLIT_GAIN
SGVector< T > clone() const 
virtual CRegressionLabels * apply_regression(CFeatures *data=NULL)
int32_t get_num_elements() const 
void prune_using_test_dataset(CDenseFeatures< float64_t > *feats, CLabels *gnd_truth, SGVector< float64_t > weights=SGVector< float64_t >())
CSGObject * get_element(int32_t index) const 
Matrix::Scalar max(Matrix m)
void push_back(CSGObject *e)
class TreeMachine, a base class for tree based multiclass classifiers. This class is derived from CBa...
SGVector< float64_t > m_weights
static float32_t sqrt(float32_t x)
Dense integer or floating point labels. 
CDynamicArray< float64_t > * m_alphas
void left(CBinaryTreeMachineNode *l)
static int32_t unique(T *output, int32_t size)
static int32_t pow(bool x, int32_t n)
static const float64_t MAX_REAL_NUMBER
const T & get_element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const 
virtual void add_subset(SGVector< index_t > subset)
SGVector< float64_t > get_unique_labels(SGVector< float64_t > labels_vec, int32_t &n_ulabels)
void set_label_epsilon(float64_t epsilon)
void prune_by_cross_validation(CDenseFeatures< float64_t > *data, int32_t folds)
static void random_vector(T *vec, int32_t len, T min_value, T max_value)
static const float64_t MISSING