CombinedDotFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009-2010 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  * Copyright (C) 2010 Berlin Institute of Technology
00010  */
00011 
00012 #include <shogun/features/CombinedDotFeatures.h>
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/mathematics/Math.h>
00015 
00016 using namespace shogun;
00017 
00018 CCombinedDotFeatures::CCombinedDotFeatures() : CDotFeatures()
00019 {
00020     init();
00021 
00022     feature_list=new CList(true);
00023     update_dim_feature_space_and_num_vec();
00024 }
00025 
00026 CCombinedDotFeatures::CCombinedDotFeatures(const CCombinedDotFeatures & orig)
00027 : CDotFeatures(orig), num_vectors(orig.num_vectors),
00028     num_dimensions(orig.num_dimensions)
00029 {
00030     init();
00031 
00032     feature_list=new CList(true);
00033 }
00034 
00035 CFeatures* CCombinedDotFeatures::duplicate() const
00036 {
00037     return new CCombinedDotFeatures(*this);
00038 }
00039 
00040 CCombinedDotFeatures::~CCombinedDotFeatures()
00041 {
00042     delete feature_list;
00043 }
00044 
00045 void CCombinedDotFeatures::list_feature_objs()
00046 {
00047     SG_INFO( "BEGIN COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions);
00048     this->list_feature_obj();
00049 
00050     CListElement* current = NULL ;
00051     CDotFeatures* f=get_first_feature_obj(current);
00052 
00053     while (f)
00054     {
00055         f->list_feature_obj();
00056         f=get_next_feature_obj(current);
00057     }
00058 
00059     SG_INFO( "END COMBINED DOTFEATURES LIST (%d, %d) - ", num_vectors, num_dimensions);
00060     this->list_feature_obj();
00061 }
00062 
00063 void CCombinedDotFeatures::update_dim_feature_space_and_num_vec()
00064 {
00065     CListElement* current = NULL ;
00066     CDotFeatures* f=get_first_feature_obj(current);
00067 
00068     int32_t dim=0;
00069     int32_t vec=-1;
00070 
00071     while (f)
00072     {
00073         dim+= f->get_dim_feature_space();
00074         if (vec==-1)
00075             vec=f->get_num_vectors();
00076         else if (vec != f->get_num_vectors())
00077         {
00078             f->list_feature_obj();
00079             SG_ERROR("Number of vectors (%d) mismatches in above feature obj (%d)\n", vec, f->get_num_vectors());
00080         }
00081 
00082         SG_UNREF(f);
00083 
00084         f=get_next_feature_obj(current);
00085     }
00086 
00087     num_dimensions=dim;
00088     num_vectors=vec;
00089     SG_DEBUG("vecs=%d, dims=%d\n", num_vectors, num_dimensions);
00090 }
00091 
00092 float64_t CCombinedDotFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00093 {
00094     float64_t result=0;
00095 
00096     ASSERT(df);
00097     ASSERT(df->get_feature_type() == get_feature_type());
00098     ASSERT(df->get_feature_class() == get_feature_class());
00099     CCombinedDotFeatures* cf = (CCombinedDotFeatures*) df;
00100 
00101     CListElement* current1 = NULL;
00102     CDotFeatures* f1=get_first_feature_obj(current1);
00103 
00104     CListElement* current2 = NULL;
00105     CDotFeatures* f2=cf->get_first_feature_obj(current2);
00106 
00107     while (f1 && f2)
00108     {
00109         result += f1->dot(vec_idx1, f2,vec_idx2) *
00110             f1->get_combined_feature_weight() *
00111             f2->get_combined_feature_weight();
00112 
00113         SG_UNREF(f1);
00114         SG_UNREF(f2);
00115         f1=get_next_feature_obj(current1);
00116         f2=cf->get_next_feature_obj(current2);
00117     }
00118 
00119     // check that both have same number of feature objects inside
00120     ASSERT(f1 == NULL && f2 == NULL);
00121 
00122     return result;
00123 }
00124 
00125 float64_t CCombinedDotFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00126 {
00127     float64_t result=0;
00128 
00129     CListElement* current = NULL ;
00130     CDotFeatures* f=get_first_feature_obj(current);
00131     uint32_t offs=0;
00132 
00133     while (f)
00134     {
00135         int32_t dim = f->get_dim_feature_space();
00136         result += f->dense_dot(vec_idx1, vec2+offs, dim)*f->get_combined_feature_weight();
00137         offs += dim;
00138 
00139         SG_UNREF(f);
00140         f=get_next_feature_obj(current);
00141     }
00142 
00143     return result;
00144 }
00145 
00146 void CCombinedDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00147 {
00148     if (stop<=start)
00149         return;
00150     ASSERT(dim==num_dimensions);
00151 
00152     CListElement* current = NULL;
00153     CDotFeatures* f=get_first_feature_obj(current);
00154     uint32_t offs=0;
00155     bool first=true;
00156     int32_t num=stop-start;
00157     float64_t* tmp=SG_MALLOC(float64_t, num);
00158 
00159     while (f)
00160     {
00161         int32_t f_dim = f->get_dim_feature_space();
00162         if (first)
00163         {
00164             f->dense_dot_range(output, start, stop, alphas, vec+offs, f_dim, b);
00165             first=false;
00166         }
00167         else
00168         {
00169             f->dense_dot_range(tmp, start, stop, alphas, vec+offs, f_dim, b);
00170             for (int32_t i=0; i<num; i++)
00171                 output[i]+=tmp[i];
00172         }
00173         offs += f_dim;
00174 
00175         SG_UNREF(f);
00176         f=get_next_feature_obj(current);
00177     }
00178     SG_FREE(tmp);
00179 }
00180 
00181 void CCombinedDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00182 {
00183     if (num<=0)
00184         return;
00185     ASSERT(dim==num_dimensions);
00186 
00187     CListElement* current = NULL;
00188     CDotFeatures* f=get_first_feature_obj(current);
00189     uint32_t offs=0;
00190     bool first=true;
00191     float64_t* tmp=SG_MALLOC(float64_t, num);
00192 
00193     while (f)
00194     {
00195         int32_t f_dim = f->get_dim_feature_space();
00196         if (first)
00197         {
00198             f->dense_dot_range_subset(sub_index, num, output, alphas, vec+offs, f_dim, b);
00199             first=false;
00200         }
00201         else
00202         {
00203             f->dense_dot_range_subset(sub_index, num, tmp, alphas, vec+offs, f_dim, b);
00204             for (int32_t i=0; i<num; i++)
00205                 output[i]+=tmp[i];
00206         }
00207         offs += f_dim;
00208 
00209         SG_UNREF(f);
00210         f=get_next_feature_obj(current);
00211     }
00212     SG_FREE(tmp);
00213 }
00214 
00215 void CCombinedDotFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00216 {
00217     CListElement* current = NULL ;
00218     CDotFeatures* f=get_first_feature_obj(current);
00219     uint32_t offs=0;
00220 
00221     while (f)
00222     {
00223         int32_t dim = f->get_dim_feature_space();
00224         f->add_to_dense_vec(alpha*f->get_combined_feature_weight(), vec_idx1, vec2+offs, dim, abs_val);
00225         offs += dim;
00226 
00227         SG_UNREF(f);
00228         f=get_next_feature_obj(current);
00229     }
00230 }
00231 
00232 void* CCombinedDotFeatures::get_feature_iterator(int32_t vector_index)
00233 {
00234     combined_feature_iterator* it=SG_MALLOC(combined_feature_iterator, 1);
00235 
00236     it->current=NULL;
00237     it->f=get_first_feature_obj(it->current);
00238     it->iterator=it->f->get_feature_iterator(vector_index);
00239     it->vector_index=vector_index;
00240     return it;
00241 }
00242 
00243 bool CCombinedDotFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
00244 {
00245     ASSERT(iterator);
00246     combined_feature_iterator* it = (combined_feature_iterator*) iterator;
00247 
00248     while (it->f)
00249     {
00250         if (it->f->get_next_feature(index, value, it->iterator))
00251         {
00252             value*=get_combined_feature_weight();
00253             return true;
00254         }
00255 
00256         it->f->free_feature_iterator(it->iterator);
00257         it->f=get_next_feature_obj(it->current);
00258         if (it->f)
00259             it->iterator=it->f->get_feature_iterator(it->vector_index);
00260         else
00261             it->iterator=NULL;
00262     }
00263     return false;
00264 }
00265 
00266 void CCombinedDotFeatures::free_feature_iterator(void* iterator)
00267 {
00268     if (iterator)
00269     {
00270         combined_feature_iterator* it = (combined_feature_iterator*) iterator;
00271         if (it->iterator && it->f)
00272             it->f->free_feature_iterator(it->iterator);
00273         SG_FREE(it);
00274     }
00275 }
00276 
00277 CDotFeatures* CCombinedDotFeatures::get_first_feature_obj()
00278 {
00279     return (CDotFeatures*) feature_list->get_first_element();
00280 }
00281 
00282 CDotFeatures* CCombinedDotFeatures::get_first_feature_obj(CListElement*& current)
00283 {
00284     return (CDotFeatures*) feature_list->get_first_element(current);
00285 }
00286 
00287 CDotFeatures* CCombinedDotFeatures::get_next_feature_obj()
00288 {
00289     return (CDotFeatures*) feature_list->get_next_element();
00290 }
00291 
00292 CDotFeatures* CCombinedDotFeatures::get_next_feature_obj(CListElement*& current)
00293 {
00294     return (CDotFeatures*) feature_list->get_next_element(current);
00295 }
00296 
00297 CDotFeatures* CCombinedDotFeatures::get_last_feature_obj()
00298 {
00299     return (CDotFeatures*) feature_list->get_last_element();
00300 }
00301 
00302 bool CCombinedDotFeatures::insert_feature_obj(CDotFeatures* obj)
00303 {
00304     ASSERT(obj);
00305     bool result=feature_list->insert_element(obj);
00306     update_dim_feature_space_and_num_vec();
00307     return result;
00308 }
00309 
00310 bool CCombinedDotFeatures::append_feature_obj(CDotFeatures* obj)
00311 {
00312     ASSERT(obj);
00313     bool result=feature_list->append_element(obj);
00314     update_dim_feature_space_and_num_vec();
00315     return result;
00316 }
00317 
00318 bool CCombinedDotFeatures::delete_feature_obj()
00319 {
00320     CDotFeatures* f=(CDotFeatures*) feature_list->delete_element();
00321     if (f)
00322     {
00323         SG_UNREF(f);
00324         update_dim_feature_space_and_num_vec();
00325         return true;
00326     }
00327     else
00328         return false;
00329 }
00330 
00331 int32_t CCombinedDotFeatures::get_num_feature_obj()
00332 {
00333     return feature_list->get_num_elements();
00334 }
00335 
00336 int32_t CCombinedDotFeatures::get_nnz_features_for_vector(int32_t num)
00337 {
00338     CListElement* current = NULL ;
00339     CDotFeatures* f=get_first_feature_obj(current);
00340     int32_t result=0;
00341 
00342     while (f)
00343     {
00344         result+=f->get_nnz_features_for_vector(num);
00345 
00346         SG_UNREF(f);
00347         f=get_next_feature_obj(current);
00348     }
00349 
00350     return result;
00351 }
00352 
00353 SGVector<float64_t> CCombinedDotFeatures::get_subfeature_weights()
00354 {
00355     int32_t num_weights = get_num_feature_obj();
00356     ASSERT(num_weights > 0);
00357 
00358     float64_t* weights=SG_MALLOC(float64_t, num_weights);
00359 
00360     CListElement* current = NULL;
00361     CDotFeatures* f = get_first_feature_obj(current);
00362 
00363     int32_t i = 0;
00364     while (f)
00365     {
00366         weights[i] = f->get_combined_feature_weight();
00367 
00368         SG_UNREF(f);
00369         f = get_next_feature_obj(current);
00370         i++;
00371     }
00372     return SGVector<float64_t>(weights,num_weights);
00373 }
00374 
00375 void CCombinedDotFeatures::set_subfeature_weights(SGVector<float64_t> weights)
00376 {
00377     int32_t i = 0;
00378     CListElement* current = NULL ;
00379     CDotFeatures* f = get_first_feature_obj(current);
00380 
00381     ASSERT(weights.vlen==get_num_feature_obj());
00382 
00383     while(f)
00384     {
00385         f->set_combined_feature_weight(weights[i]);
00386 
00387         SG_UNREF(f);
00388         f = get_next_feature_obj(current);
00389         i++;
00390     }
00391 }
00392 
00393 void CCombinedDotFeatures::init()
00394 {
00395     m_parameters->add(&num_dimensions, "num_dimensions",
00396                       "Total number of dimensions.");
00397     m_parameters->add(&num_vectors, "num_vectors",
00398                       "Total number of vectors.");
00399     m_parameters->add((CSGObject**) &feature_list,
00400                       "feature_list", "Feature list.");
00401 }
00402 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation