00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/features/TOPFeatures.h>
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/mathematics/Math.h>
00015
00016 using namespace shogun;
00017
00018 CTOPFeatures::CTOPFeatures()
00019 {
00020 init();
00021 }
00022
00023 CTOPFeatures::CTOPFeatures(
00024 int32_t size, CHMM* p, CHMM* n, bool neglin, bool poslin)
00025 : CSimpleFeatures<float64_t>(size)
00026 {
00027 init();
00028 neglinear=neglin;
00029 poslinear=poslin;
00030
00031 set_models(p,n);
00032 }
00033
00034 CTOPFeatures::CTOPFeatures(const CTOPFeatures &orig)
00035 : CSimpleFeatures<float64_t>(orig)
00036 {
00037 init();
00038 pos=orig.pos;
00039 neg=orig.neg;
00040 neglinear=orig.neglinear;
00041 poslinear=orig.poslinear;
00042 }
00043
00044 CTOPFeatures::~CTOPFeatures()
00045 {
00046 SG_FREE(pos_relevant_indizes.idx_p);
00047 SG_FREE(pos_relevant_indizes.idx_q);
00048 SG_FREE(pos_relevant_indizes.idx_a_cols);
00049 SG_FREE(pos_relevant_indizes.idx_a_rows);
00050 SG_FREE(pos_relevant_indizes.idx_b_cols);
00051 SG_FREE(pos_relevant_indizes.idx_b_rows);
00052
00053 SG_FREE(neg_relevant_indizes.idx_p);
00054 SG_FREE(neg_relevant_indizes.idx_q);
00055 SG_FREE(neg_relevant_indizes.idx_a_cols);
00056 SG_FREE(neg_relevant_indizes.idx_a_rows);
00057 SG_FREE(neg_relevant_indizes.idx_b_cols);
00058 SG_FREE(neg_relevant_indizes.idx_b_rows);
00059
00060 SG_UNREF(pos);
00061 SG_UNREF(neg);
00062 }
00063
00064 void CTOPFeatures::set_models(CHMM* p, CHMM* n)
00065 {
00066 ASSERT(p && n);
00067 SG_REF(p);
00068 SG_REF(n);
00069
00070 pos=p;
00071 neg=n;
00072 set_num_vectors(0);
00073
00074 SG_FREE(feature_matrix);
00075 feature_matrix=NULL ;
00076
00077
00078 if (pos && pos->get_observations())
00079 set_num_vectors(pos->get_observations()->get_num_vectors());
00080
00081 compute_relevant_indizes(p, &pos_relevant_indizes);
00082 compute_relevant_indizes(n, &neg_relevant_indizes);
00083 num_features=compute_num_features();
00084
00085 SG_DEBUG( "pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i] -> %i features\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M(),num_features) ;
00086 }
00087
00088 float64_t* CTOPFeatures::compute_feature_vector(
00089 int32_t num, int32_t &len, float64_t* target)
00090 {
00091 float64_t* featurevector=target;
00092
00093 if (!featurevector)
00094 featurevector=SG_MALLOC(float64_t, get_num_features());
00095
00096 if (!featurevector)
00097 return NULL;
00098
00099 compute_feature_vector(featurevector, num, len);
00100
00101 return featurevector;
00102 }
00103
00104 void CTOPFeatures::compute_feature_vector(
00105 float64_t* featurevector, int32_t num, int32_t& len)
00106 {
00107 int32_t i,j,p=0,x=num;
00108 int32_t idx=0;
00109
00110 float64_t posx=(poslinear) ?
00111 (pos->linear_model_probability(x)) : (pos->model_probability(x));
00112 float64_t negx=(neglinear) ?
00113 (neg->linear_model_probability(x)) : (neg->model_probability(x));
00114
00115 len=get_num_features();
00116
00117 featurevector[p++]=(posx-negx);
00118
00119
00120 if (poslinear)
00121 {
00122 for (i=0; i<pos->get_N(); i++)
00123 {
00124 for (j=0; j<pos->get_M(); j++)
00125 featurevector[p++]=exp(pos->linear_model_derivative(i, j, x)-posx);
00126 }
00127 }
00128 else
00129 {
00130 for (idx=0; idx< pos_relevant_indizes.num_p; idx++)
00131 featurevector[p++]=exp(pos->model_derivative_p(pos_relevant_indizes.idx_p[idx], x)-posx);
00132
00133 for (idx=0; idx< pos_relevant_indizes.num_q; idx++)
00134 featurevector[p++]=exp(pos->model_derivative_q(pos_relevant_indizes.idx_q[idx], x)-posx);
00135
00136 for (idx=0; idx< pos_relevant_indizes.num_a; idx++)
00137 featurevector[p++]=exp(pos->model_derivative_a(pos_relevant_indizes.idx_a_rows[idx], pos_relevant_indizes.idx_a_cols[idx], x)-posx);
00138
00139 for (idx=0; idx< pos_relevant_indizes.num_b; idx++)
00140 featurevector[p++]=exp(pos->model_derivative_b(pos_relevant_indizes.idx_b_rows[idx], pos_relevant_indizes.idx_b_cols[idx], x)-posx);
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154 }
00155
00156
00157 if (neglinear)
00158 {
00159 for (i=0; i<neg->get_N(); i++)
00160 {
00161 for (j=0; j<neg->get_M(); j++)
00162 featurevector[p++]= - exp(neg->linear_model_derivative(i, j, x)-negx);
00163 }
00164 }
00165 else
00166 {
00167 for (idx=0; idx< neg_relevant_indizes.num_p; idx++)
00168 featurevector[p++]= - exp(neg->model_derivative_p(neg_relevant_indizes.idx_p[idx], x)-negx);
00169
00170 for (idx=0; idx< neg_relevant_indizes.num_q; idx++)
00171 featurevector[p++]= - exp(neg->model_derivative_q(neg_relevant_indizes.idx_q[idx], x)-negx);
00172
00173 for (idx=0; idx< neg_relevant_indizes.num_a; idx++)
00174 featurevector[p++]= - exp(neg->model_derivative_a(neg_relevant_indizes.idx_a_rows[idx], neg_relevant_indizes.idx_a_cols[idx], x)-negx);
00175
00176 for (idx=0; idx< neg_relevant_indizes.num_b; idx++)
00177 featurevector[p++]= - exp(neg->model_derivative_b(neg_relevant_indizes.idx_b_rows[idx], neg_relevant_indizes.idx_b_cols[idx], x)-negx);
00178
00179
00180
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190 }
00191 }
00192
00193 float64_t* CTOPFeatures::set_feature_matrix()
00194 {
00195 int32_t len=0;
00196
00197 num_features=get_num_features();
00198 ASSERT(num_features);
00199 ASSERT(pos);
00200 ASSERT(pos->get_observations());
00201
00202 num_vectors=pos->get_observations()->get_num_vectors();
00203 SG_INFO( "allocating top feature cache of size %.2fM\n", sizeof(float64_t)*num_features*num_vectors/1024.0/1024.0);
00204 SG_FREE(feature_matrix);
00205 feature_matrix=SG_MALLOC(float64_t, num_features*num_vectors);
00206 if (!feature_matrix)
00207 {
00208 SG_ERROR( "allocation not successful!");
00209 return NULL ;
00210 } ;
00211
00212 SG_INFO( "calculating top feature matrix\n");
00213
00214 for (int32_t x=0; x<num_vectors; x++)
00215 {
00216 if (!(x % (num_vectors/10+1)))
00217 SG_DEBUG( "%02d%%.", (int) (100.0*x/num_vectors));
00218 else if (!(x % (num_vectors/200+1)))
00219 SG_DEBUG( ".");
00220
00221 compute_feature_vector(&feature_matrix[x*num_features], x, len);
00222 }
00223
00224 SG_DONE();
00225
00226 num_vectors=get_num_vectors() ;
00227 num_features=get_num_features() ;
00228
00229 return feature_matrix;
00230 }
00231
00232 bool CTOPFeatures::compute_relevant_indizes(CHMM* hmm, T_HMM_INDIZES* hmm_idx)
00233 {
00234 int32_t i=0;
00235 int32_t j=0;
00236
00237 hmm_idx->num_p=0;
00238 hmm_idx->num_q=0;
00239 hmm_idx->num_a=0;
00240 hmm_idx->num_b=0;
00241
00242 for (i=0; i<hmm->get_N(); i++)
00243 {
00244 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00245 hmm_idx->num_p++;
00246
00247 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00248 hmm_idx->num_q++;
00249
00250 for (j=0; j<hmm->get_N(); j++)
00251 {
00252 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00253 hmm_idx->num_a++;
00254 }
00255
00256 for (j=0; j<pos->get_M(); j++)
00257 {
00258 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00259 hmm_idx->num_b++;
00260 }
00261 }
00262
00263 if (hmm_idx->num_p > 0)
00264 {
00265 hmm_idx->idx_p=SG_MALLOC(int32_t, hmm_idx->num_p);
00266 ASSERT(hmm_idx->idx_p);
00267 }
00268
00269 if (hmm_idx->num_q > 0)
00270 {
00271 hmm_idx->idx_q=SG_MALLOC(int32_t, hmm_idx->num_q);
00272 ASSERT(hmm_idx->idx_q);
00273 }
00274
00275 if (hmm_idx->num_a > 0)
00276 {
00277 hmm_idx->idx_a_rows=SG_MALLOC(int32_t, hmm_idx->num_a);
00278 hmm_idx->idx_a_cols=SG_MALLOC(int32_t, hmm_idx->num_a);
00279 ASSERT(hmm_idx->idx_a_rows);
00280 ASSERT(hmm_idx->idx_a_cols);
00281 }
00282
00283 if (hmm_idx->num_b > 0)
00284 {
00285 hmm_idx->idx_b_rows=SG_MALLOC(int32_t, hmm_idx->num_b);
00286 hmm_idx->idx_b_cols=SG_MALLOC(int32_t, hmm_idx->num_b);
00287 ASSERT(hmm_idx->idx_b_rows);
00288 ASSERT(hmm_idx->idx_b_cols);
00289 }
00290
00291
00292 int32_t idx_p=0;
00293 int32_t idx_q=0;
00294 int32_t idx_a=0;
00295 int32_t idx_b=0;
00296
00297 for (i=0; i<hmm->get_N(); i++)
00298 {
00299 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00300 {
00301 ASSERT(idx_p < hmm_idx->num_p);
00302 hmm_idx->idx_p[idx_p++]=i;
00303 }
00304
00305 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00306 {
00307 ASSERT(idx_q < hmm_idx->num_q);
00308 hmm_idx->idx_q[idx_q++]=i;
00309 }
00310
00311 for (j=0; j<hmm->get_N(); j++)
00312 {
00313 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00314 {
00315 ASSERT(idx_a < hmm_idx->num_a);
00316 hmm_idx->idx_a_rows[idx_a]=i;
00317 hmm_idx->idx_a_cols[idx_a++]=j;
00318 }
00319 }
00320
00321 for (j=0; j<pos->get_M(); j++)
00322 {
00323 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00324 {
00325 ASSERT(idx_b < hmm_idx->num_b);
00326 hmm_idx->idx_b_rows[idx_b]=i;
00327 hmm_idx->idx_b_cols[idx_b++]=j;
00328 }
00329 }
00330 }
00331
00332 return true;
00333 }
00334
00335 int32_t CTOPFeatures::compute_num_features()
00336 {
00337 int32_t num=0;
00338
00339 if (pos && neg)
00340 {
00341 num+=1;
00342
00343 if (poslinear)
00344 num+=pos->get_N()*pos->get_M();
00345 else
00346 {
00347 num+= pos_relevant_indizes.num_p + pos_relevant_indizes.num_q + pos_relevant_indizes.num_a + pos_relevant_indizes.num_b;
00348 }
00349
00350 if (neglinear)
00351 num+=neg->get_N()*neg->get_M();
00352 else
00353 {
00354 num+= neg_relevant_indizes.num_p + neg_relevant_indizes.num_q + neg_relevant_indizes.num_a + neg_relevant_indizes.num_b;
00355 }
00356
00357
00358
00359
00360 }
00361 return num;
00362 }
00363
00364 void CTOPFeatures::init()
00365 {
00366 pos = NULL;
00367 neg = NULL;
00368 neglinear = false;
00369 poslinear = false;
00370
00371 memset(&pos_relevant_indizes, 0, sizeof(pos_relevant_indizes));
00372 memset(&neg_relevant_indizes, 0, sizeof(neg_relevant_indizes));
00373
00374 unset_generic();
00375
00376
00377
00378 m_parameters->add(&neglinear, "neglinear", "If negative HMM is a LinearHMM");
00379 m_parameters->add(&poslinear, "poslinear", "If positive HMM is a LinearHMM");
00380 }