48 REQUIRE(size>0,
"Subset size should be greater than 0. %d supplied!\n",size)
49 m_randsubset_size=size;
67 total_wclasses.
zero();
74 for (int32_t i=0;i<num_vecs;i++)
76 for (int32_t j=0;j<n_ulabels;j++)
78 if (
CMath::abs(labels_vec[i]-ulabels[j])<=delta)
81 total_wclasses[j]+=weights[i];
88 REQUIRE(m_randsubset_size<=num_feats,
"The Feature subset size(set %d) should be less than"
89 " or equal to the total number of features(%d here)\n",m_randsubset_size,num_feats)
92 if (m_randsubset_size==0)
102 int32_t best_attribute=-1;
104 for (int32_t i=0;i<m_randsubset_size;i++)
107 for (int32_t j=0;j<num_vecs;j++)
108 feats[j]=mat(idx[i],j);
114 int32_t n_nm_vecs=feats.
vlen;
115 while (feats[sorted_args[n_nm_vecs-1]]==
MISSING)
117 total_wclasses[simple_labels[sorted_args[n_nm_vecs-1]]]-=weights[sorted_args[n_nm_vecs-1]];
122 if (feats[sorted_args[n_nm_vecs-1]]<=feats[sorted_args[0]]+
EQ_DELTA)
131 simple_feats[sorted_args[0]]=0;
133 for (int32_t j=1;j<n_nm_vecs;j++)
135 if (feats[sorted_args[j]]==feats[sorted_args[j-1]])
136 simple_feats[sorted_args[j]]=c;
138 simple_feats[sorted_args[j]]=(++c);
142 ufeats[0]=feats[sorted_args[0]];
144 for (int32_t j=1;j<n_nm_vecs;j++)
146 if (feats[sorted_args[j]]==feats[sorted_args[j-1]])
149 ufeats[++u]=feats[sorted_args[j]];
154 for (int32_t k=1;k<num_cases;k++)
169 for (int32_t p=0;p<c+1;p++)
173 for (int32_t j=0;j<n_nm_vecs;j++)
175 is_left[sorted_args[j]]=feats_left[simple_feats[sorted_args[j]]];
176 if (is_left[sorted_args[j]])
177 wleft[simple_labels[sorted_args[j]]]+=weights[sorted_args[j]];
179 wright[simple_labels[sorted_args[j]]]+=weights[sorted_args[j]];
184 g=
gain(wleft,wright,total_wclasses);
186 g=
gain(wleft,wright,total_wclasses,ulabels);
188 SG_ERROR(
"Undefined problem statement\n");
192 best_attribute=idx[i];
195 num_missing_final=num_vecs-n_nm_vecs;
198 for (int32_t l=0;l<c+1;l++)
199 count_left=(feats_left[l])?count_left+1:count_left;
201 count_right=c+1-count_left;
205 for (int32_t w=0;w<c+1;w++)
210 right[r++]=ufeats[w];
220 left_wclasses.
zero();
225 right_wclasses[simple_labels[sorted_args[0]]]-=weights[sorted_args[0]];
226 left_wclasses[simple_labels[sorted_args[0]]]+=weights[sorted_args[0]];
227 for (int32_t j=1;j<n_nm_vecs;j++)
229 if (feats[sorted_args[j]]<=z+
EQ_DELTA)
231 right_wclasses[simple_labels[sorted_args[j]]]-=weights[sorted_args[j]];
232 left_wclasses[simple_labels[sorted_args[j]]]+=weights[sorted_args[j]];
239 g=
gain(left_wclasses,right_wclasses,total_wclasses);
241 g=
gain(left_wclasses,right_wclasses,total_wclasses,ulabels);
243 SG_ERROR(
"Undefined problem statement\n");
248 best_attribute=idx[i];
250 num_missing_final=num_vecs-n_nm_vecs;
253 z=feats[sorted_args[j]];
254 if (feats[sorted_args[n_nm_vecs-1]]<=z+
EQ_DELTA)
257 right_wclasses[simple_labels[sorted_args[j]]]-=weights[sorted_args[j]];
258 left_wclasses[simple_labels[sorted_args[j]]]+=weights[sorted_args[j]];
263 while (n_nm_vecs<feats.
vlen)
265 total_wclasses[simple_labels[sorted_args[n_nm_vecs-1]]]+=weights[sorted_args[n_nm_vecs-1]];
270 if (best_attribute==-1)
275 left[0]=best_threshold;
276 right[0]=best_threshold;
279 for (int32_t i=0;i<num_vecs;i++)
280 is_left_final[i]=(mat(best_attribute,i)<=best_threshold);
283 return best_attribute;
286 void CRandomCARTree::init()
void set_feature_subset_size(int32_t size)
void range_fill(T start=0)
static void permute(SGVector< T > v, CRandom *rand=NULL)
static void fill_vector(T *vec, int32_t len, T value)
static const float64_t EQ_DELTA
float64_t m_label_epsilon
float64_t gain(SGVector< float64_t > wleft, SGVector< float64_t > wright, SGVector< float64_t > wtotal, SGVector< float64_t > labels)
static SGVector< index_t > argsort(SGVector< T > vector)
This class implements the Classification And Regression Trees algorithm by Breiman et al for decision...
all of classes and functions are contained in the shogun namespace
SGVector< bool > m_nominal
static const float64_t MIN_SPLIT_GAIN
virtual int32_t compute_best_attribute(SGMatrix< float64_t > mat, SGVector< float64_t > weights, SGVector< float64_t > labels_vec, SGVector< float64_t > left, SGVector< float64_t > right, SGVector< bool > is_left_final, int32_t &num_missing, int32_t &count_left, int32_t &count_right)
SGVector< T > clone() const
static float32_t sqrt(float32_t x)
static int32_t pow(bool x, int32_t n)
SGVector< float64_t > get_unique_labels(SGVector< float64_t > labels_vec, int32_t &n_ulabels)
static const float64_t MISSING
virtual ~CRandomCARTree()