TOPFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/features/TOPFeatures.h>
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/mathematics/Math.h>
00015 
00016 using namespace shogun;
00017 
00018 CTOPFeatures::CTOPFeatures()
00019 {
00020     init();
00021 }
00022 
00023 CTOPFeatures::CTOPFeatures(
00024     int32_t size, CHMM* p, CHMM* n, bool neglin, bool poslin)
00025 : CSimpleFeatures<float64_t>(size)
00026 {
00027     init();
00028     neglinear=neglin;
00029     poslinear=poslin;
00030 
00031     set_models(p,n);
00032 }
00033 
00034 CTOPFeatures::CTOPFeatures(const CTOPFeatures &orig)
00035 : CSimpleFeatures<float64_t>(orig)
00036 {
00037     init();
00038     pos=orig.pos;
00039     neg=orig.neg;
00040     neglinear=orig.neglinear;
00041     poslinear=orig.poslinear;
00042 }
00043 
00044 CTOPFeatures::~CTOPFeatures()
00045 {
00046     SG_FREE(pos_relevant_indizes.idx_p);
00047     SG_FREE(pos_relevant_indizes.idx_q);
00048     SG_FREE(pos_relevant_indizes.idx_a_cols);
00049     SG_FREE(pos_relevant_indizes.idx_a_rows);
00050     SG_FREE(pos_relevant_indizes.idx_b_cols);
00051     SG_FREE(pos_relevant_indizes.idx_b_rows);
00052 
00053     SG_FREE(neg_relevant_indizes.idx_p);
00054     SG_FREE(neg_relevant_indizes.idx_q);
00055     SG_FREE(neg_relevant_indizes.idx_a_cols);
00056     SG_FREE(neg_relevant_indizes.idx_a_rows);
00057     SG_FREE(neg_relevant_indizes.idx_b_cols);
00058     SG_FREE(neg_relevant_indizes.idx_b_rows);
00059 
00060     SG_UNREF(pos);
00061     SG_UNREF(neg);
00062 }
00063 
00064 void CTOPFeatures::set_models(CHMM* p, CHMM* n)
00065 {
00066     ASSERT(p && n);
00067     SG_REF(p);
00068     SG_REF(n);
00069 
00070     pos=p; 
00071     neg=n;
00072     set_num_vectors(0);
00073 
00074     SG_FREE(feature_matrix);
00075     feature_matrix=NULL ;
00076 
00077 
00078     if (pos && pos->get_observations())
00079         set_num_vectors(pos->get_observations()->get_num_vectors());
00080 
00081     compute_relevant_indizes(p, &pos_relevant_indizes);
00082     compute_relevant_indizes(n, &neg_relevant_indizes);
00083     num_features=compute_num_features();
00084 
00085     SG_DEBUG( "pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i] -> %i features\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M(),num_features) ;
00086 }
00087 
00088 float64_t* CTOPFeatures::compute_feature_vector(
00089     int32_t num, int32_t &len, float64_t* target)
00090 {
00091     float64_t* featurevector=target;
00092 
00093     if (!featurevector) 
00094         featurevector=SG_MALLOC(float64_t, get_num_features());
00095 
00096     if (!featurevector)
00097         return NULL;
00098 
00099     compute_feature_vector(featurevector, num, len);
00100 
00101     return featurevector;
00102 }
00103 
00104 void CTOPFeatures::compute_feature_vector(
00105     float64_t* featurevector, int32_t num, int32_t& len)
00106 {
00107     int32_t i,j,p=0,x=num;
00108     int32_t idx=0;
00109 
00110     float64_t posx=(poslinear) ?
00111         (pos->linear_model_probability(x)) : (pos->model_probability(x));
00112     float64_t negx=(neglinear) ?
00113         (neg->linear_model_probability(x)) : (neg->model_probability(x));
00114 
00115     len=get_num_features();
00116 
00117     featurevector[p++]=(posx-negx);
00118 
00119     //first do positive model
00120     if (poslinear)
00121     {
00122         for (i=0; i<pos->get_N(); i++)
00123         {
00124             for (j=0; j<pos->get_M(); j++)
00125                 featurevector[p++]=exp(pos->linear_model_derivative(i, j, x)-posx);
00126         }
00127     }
00128     else
00129     {
00130         for (idx=0; idx< pos_relevant_indizes.num_p; idx++)
00131             featurevector[p++]=exp(pos->model_derivative_p(pos_relevant_indizes.idx_p[idx], x)-posx);
00132 
00133         for (idx=0; idx< pos_relevant_indizes.num_q; idx++)
00134             featurevector[p++]=exp(pos->model_derivative_q(pos_relevant_indizes.idx_q[idx], x)-posx);
00135 
00136         for (idx=0; idx< pos_relevant_indizes.num_a; idx++)
00137                 featurevector[p++]=exp(pos->model_derivative_a(pos_relevant_indizes.idx_a_rows[idx], pos_relevant_indizes.idx_a_cols[idx], x)-posx);
00138 
00139         for (idx=0; idx< pos_relevant_indizes.num_b; idx++)
00140                 featurevector[p++]=exp(pos->model_derivative_b(pos_relevant_indizes.idx_b_rows[idx], pos_relevant_indizes.idx_b_cols[idx], x)-posx);
00141 
00142 
00143         //for (i=0; i<pos->get_N(); i++)
00144         //{
00145         //  featurevector[p++]=exp(pos->model_derivative_p(i, x)-posx);
00146         //  featurevector[p++]=exp(pos->model_derivative_q(i, x)-posx);
00147 
00148         //  for (j=0; j<pos->get_N(); j++)
00149         //      featurevector[p++]=exp(pos->model_derivative_a(i, j, x)-posx);
00150 
00151         //  for (j=0; j<pos->get_M(); j++)
00152         //      featurevector[p++]=exp(pos->model_derivative_b(i, j, x)-posx);
00153         //}
00154     }
00155 
00156     //then do negative
00157     if (neglinear)
00158     {
00159         for (i=0; i<neg->get_N(); i++)
00160         {
00161             for (j=0; j<neg->get_M(); j++)
00162                 featurevector[p++]= - exp(neg->linear_model_derivative(i, j, x)-negx);
00163         }
00164     }
00165     else
00166     {
00167         for (idx=0; idx< neg_relevant_indizes.num_p; idx++)
00168             featurevector[p++]= - exp(neg->model_derivative_p(neg_relevant_indizes.idx_p[idx], x)-negx);
00169 
00170         for (idx=0; idx< neg_relevant_indizes.num_q; idx++)
00171             featurevector[p++]= - exp(neg->model_derivative_q(neg_relevant_indizes.idx_q[idx], x)-negx);
00172 
00173         for (idx=0; idx< neg_relevant_indizes.num_a; idx++)
00174                 featurevector[p++]= - exp(neg->model_derivative_a(neg_relevant_indizes.idx_a_rows[idx], neg_relevant_indizes.idx_a_cols[idx], x)-negx);
00175 
00176         for (idx=0; idx< neg_relevant_indizes.num_b; idx++)
00177                 featurevector[p++]= - exp(neg->model_derivative_b(neg_relevant_indizes.idx_b_rows[idx], neg_relevant_indizes.idx_b_cols[idx], x)-negx);
00178 
00179         //for (i=0; i<neg->get_N(); i++)
00180         //{
00181         //  featurevector[p++]= - exp(neg->model_derivative_p(i, x)-negx);
00182         //  featurevector[p++]= - exp(neg->model_derivative_q(i, x)-negx);
00183 
00184         //  for (j=0; j<neg->get_N(); j++)
00185         //      featurevector[p++]= - exp(neg->model_derivative_a(i, j, x)-negx);
00186 
00187         //  for (j=0; j<neg->get_M(); j++)
00188         //      featurevector[p++]= - exp(neg->model_derivative_b(i, j, x)-negx);
00189         //}
00190     }
00191 }
00192 
00193 float64_t* CTOPFeatures::set_feature_matrix()
00194 {
00195     int32_t len=0;
00196 
00197     num_features=get_num_features();
00198     ASSERT(num_features);
00199     ASSERT(pos);
00200     ASSERT(pos->get_observations());
00201 
00202     num_vectors=pos->get_observations()->get_num_vectors();
00203     SG_INFO( "allocating top feature cache of size %.2fM\n", sizeof(float64_t)*num_features*num_vectors/1024.0/1024.0);
00204     SG_FREE(feature_matrix);
00205     feature_matrix=SG_MALLOC(float64_t, num_features*num_vectors);
00206     if (!feature_matrix)
00207     {
00208       SG_ERROR( "allocation not successful!");
00209         return NULL ;
00210     } ;
00211 
00212     SG_INFO( "calculating top feature matrix\n");
00213 
00214     for (int32_t x=0; x<num_vectors; x++)
00215     {
00216         if (!(x % (num_vectors/10+1)))
00217             SG_DEBUG( "%02d%%.", (int) (100.0*x/num_vectors));
00218         else if (!(x % (num_vectors/200+1)))
00219             SG_DEBUG( ".");
00220 
00221         compute_feature_vector(&feature_matrix[x*num_features], x, len);
00222     }
00223 
00224     SG_DONE();
00225 
00226     num_vectors=get_num_vectors() ;
00227     num_features=get_num_features() ;
00228 
00229     return feature_matrix;
00230 }
00231 
00232 bool CTOPFeatures::compute_relevant_indizes(CHMM* hmm, T_HMM_INDIZES* hmm_idx)
00233 {
00234     int32_t i=0;
00235     int32_t j=0;
00236 
00237     hmm_idx->num_p=0;
00238     hmm_idx->num_q=0;
00239     hmm_idx->num_a=0;
00240     hmm_idx->num_b=0;
00241 
00242     for (i=0; i<hmm->get_N(); i++)
00243     {
00244         if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00245             hmm_idx->num_p++;
00246 
00247         if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00248             hmm_idx->num_q++;
00249 
00250         for (j=0; j<hmm->get_N(); j++)
00251         {
00252             if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00253                 hmm_idx->num_a++;
00254         }
00255 
00256         for (j=0; j<pos->get_M(); j++)
00257         {
00258             if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00259                 hmm_idx->num_b++;
00260         }
00261     }
00262 
00263     if (hmm_idx->num_p > 0)
00264     {
00265         hmm_idx->idx_p=SG_MALLOC(int32_t, hmm_idx->num_p);
00266         ASSERT(hmm_idx->idx_p);
00267     }
00268 
00269     if (hmm_idx->num_q > 0)
00270     {
00271         hmm_idx->idx_q=SG_MALLOC(int32_t, hmm_idx->num_q);
00272         ASSERT(hmm_idx->idx_q);
00273     }
00274 
00275     if (hmm_idx->num_a > 0)
00276     {
00277         hmm_idx->idx_a_rows=SG_MALLOC(int32_t, hmm_idx->num_a);
00278         hmm_idx->idx_a_cols=SG_MALLOC(int32_t, hmm_idx->num_a);
00279         ASSERT(hmm_idx->idx_a_rows);
00280         ASSERT(hmm_idx->idx_a_cols);
00281     }
00282 
00283     if (hmm_idx->num_b > 0)
00284     {
00285         hmm_idx->idx_b_rows=SG_MALLOC(int32_t, hmm_idx->num_b);
00286         hmm_idx->idx_b_cols=SG_MALLOC(int32_t, hmm_idx->num_b);
00287         ASSERT(hmm_idx->idx_b_rows);
00288         ASSERT(hmm_idx->idx_b_cols);
00289     }
00290 
00291 
00292     int32_t idx_p=0;
00293     int32_t idx_q=0;
00294     int32_t idx_a=0;
00295     int32_t idx_b=0;
00296 
00297     for (i=0; i<hmm->get_N(); i++)
00298     {
00299         if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
00300         {
00301             ASSERT(idx_p < hmm_idx->num_p);
00302             hmm_idx->idx_p[idx_p++]=i;
00303         }
00304         
00305         if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
00306         {
00307             ASSERT(idx_q < hmm_idx->num_q);
00308             hmm_idx->idx_q[idx_q++]=i;
00309         }
00310 
00311         for (j=0; j<hmm->get_N(); j++)
00312         {
00313             if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
00314             {
00315                 ASSERT(idx_a < hmm_idx->num_a);
00316                 hmm_idx->idx_a_rows[idx_a]=i;
00317                 hmm_idx->idx_a_cols[idx_a++]=j;
00318             }
00319         }
00320 
00321         for (j=0; j<pos->get_M(); j++)
00322         {
00323             if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
00324             {
00325                 ASSERT(idx_b < hmm_idx->num_b);
00326                 hmm_idx->idx_b_rows[idx_b]=i;
00327                 hmm_idx->idx_b_cols[idx_b++]=j;
00328             }
00329         }
00330     }
00331 
00332     return true;
00333 }
00334 
00335 int32_t CTOPFeatures::compute_num_features()
00336 {
00337     int32_t num=0;
00338 
00339     if (pos && neg)
00340     {
00341         num+=1; //zeroth- component
00342 
00343         if (poslinear)
00344             num+=pos->get_N()*pos->get_M();
00345         else
00346         {
00347             num+= pos_relevant_indizes.num_p + pos_relevant_indizes.num_q + pos_relevant_indizes.num_a + pos_relevant_indizes.num_b;
00348         }
00349 
00350         if (neglinear)
00351             num+=neg->get_N()*neg->get_M();
00352         else
00353         {
00354             num+= neg_relevant_indizes.num_p + neg_relevant_indizes.num_q + neg_relevant_indizes.num_a + neg_relevant_indizes.num_b;
00355         }
00356 
00357         //num+=1; //zeroth- component
00358         //num+= (poslinear) ? (pos->get_N()*pos->get_M()) : (pos->get_N()*(1+pos->get_N()+1+pos->get_M()));
00359         //num+= (neglinear) ? (neg->get_N()*neg->get_M()) : (neg->get_N()*(1+neg->get_N()+1+neg->get_M()));
00360     }
00361     return num;
00362 }
00363 
00364 void CTOPFeatures::init()
00365 {
00366     pos = NULL;
00367     neg = NULL;
00368     neglinear = false;
00369     poslinear = false;
00370 
00371     memset(&pos_relevant_indizes, 0, sizeof(pos_relevant_indizes));
00372     memset(&neg_relevant_indizes, 0, sizeof(neg_relevant_indizes));
00373 
00374     unset_generic();
00375     //TODO serialize HMMs
00376     //m_parameters->add((CSGObject**) &pos, "pos", "HMM for positive class.");
00377     //m_parameters->add((CSGObject**) &neg, "neg", "HMM for negative class.");
00378     m_parameters->add(&neglinear, "neglinear", "If negative HMM is a LinearHMM");
00379     m_parameters->add(&poslinear, "poslinear", "If positive HMM is a LinearHMM");
00380 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation