SHOGUN: DotFeatures.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/features/DotFeatures.h>
00012 #include <shogun/io/SGIO.h>
00013 #include <shogun/lib/Signal.h>
00014 #include <shogun/base/Parallel.h>
00015 #include <shogun/base/Parameter.h>
00016 
00017 #ifdef HAVE_PTHREAD
00018 #include <pthread.h>
00019 #endif
00020 
00021 using namespace shogun;
00022 
00023 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00024 struct DF_THREAD_PARAM
00025 {
00026     CDotFeatures* df;
00027     int32_t* sub_index;
00028     float64_t* output;
00029     int32_t start;
00030     int32_t stop;
00031     float64_t* alphas;
00032     float64_t* vec;
00033     int32_t dim;
00034     float64_t bias;
00035     bool progress;
00036 };
00037 #endif // DOXYGEN_SHOULD_SKIP_THIS
00038 
00039 
00040 CDotFeatures::CDotFeatures(int32_t size)
00041     :CFeatures(size), combined_weight(1.0)
00042 {
00043     init();
00044     set_property(FP_DOT);
00045 }
00046 
00047 
00048 CDotFeatures::CDotFeatures(const CDotFeatures & orig)
00049     :CFeatures(orig), combined_weight(orig.combined_weight)
00050 {
00051     init();
00052 }
00053 
00054 
00055 CDotFeatures::CDotFeatures(CFile* loader)
00056     :CFeatures(loader)
00057 {
00058     init();
00059 }
00060 
00061 void
00062 CDotFeatures::init(void)
00063 {
00064     m_parameters->add(&combined_weight, "combined_weight",
00065                       "Feature weighting in combined dot features.");
00066 }
00067 
00068 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00069 {
00070     ASSERT(output);
00071     // write access is internally between output[start..stop] so the following
00072     // line is necessary to write to output[0...(stop-start-1)]
00073     output-=start; 
00074     ASSERT(start>=0);
00075     ASSERT(start<stop);
00076     ASSERT(stop<=get_num_vectors());
00077 
00078     int32_t num_vectors=stop-start;
00079     ASSERT(num_vectors>0);
00080 
00081     int32_t num_threads=parallel->get_num_threads();
00082     ASSERT(num_threads>0);
00083 
00084     CSignal::clear_cancel();
00085 
00086 #ifdef HAVE_PTHREAD
00087     if (num_threads < 2)
00088     {
00089 #endif
00090         DF_THREAD_PARAM params;
00091         params.df=this;
00092         params.sub_index=NULL;
00093         params.output=output;
00094         params.start=start;
00095         params.stop=stop;
00096         params.alphas=alphas;
00097         params.vec=vec;
00098         params.dim=dim;
00099         params.bias=b;
00100         params.progress=false; //true;
00101         dense_dot_range_helper((void*) &params);
00102 #ifdef HAVE_PTHREAD
00103     }
00104     else
00105     {
00106         pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00107         DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
00108         int32_t step= num_vectors/num_threads;
00109 
00110         int32_t t;
00111 
00112         for (t=0; t<num_threads-1; t++)
00113         {
00114             params[t].df = this;
00115             params[t].sub_index=NULL;
00116             params[t].output = output;
00117             params[t].start = start+t*step;
00118             params[t].stop = start+(t+1)*step;
00119             params[t].alphas=alphas;
00120             params[t].vec=vec;
00121             params[t].dim=dim;
00122             params[t].bias=b;
00123             params[t].progress = false;
00124             pthread_create(&threads[t], NULL,
00125                     CDotFeatures::dense_dot_range_helper, (void*)&params[t]);
00126         }
00127 
00128         params[t].df = this;
00129         params[t].output = output;
00130         params[t].sub_index=NULL;
00131         params[t].start = start+t*step;
00132         params[t].stop = stop;
00133         params[t].alphas=alphas;
00134         params[t].vec=vec;
00135         params[t].dim=dim;
00136         params[t].bias=b;
00137         params[t].progress = false; //true;
00138         dense_dot_range_helper((void*) &params[t]);
00139 
00140         for (t=0; t<num_threads-1; t++)
00141             pthread_join(threads[t], NULL);
00142 
00143         SG_FREE(params);
00144         SG_FREE(threads);
00145     }
00146 #endif
00147 
00148 #ifndef WIN32
00149         if ( CSignal::cancel_computations() )
00150             SG_INFO( "prematurely stopped.           \n");
00151 #endif
00152 }
00153 
00154 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00155 {
00156     ASSERT(sub_index);
00157     ASSERT(output);
00158 
00159     int32_t num_threads=parallel->get_num_threads();
00160     ASSERT(num_threads>0);
00161 
00162     CSignal::clear_cancel();
00163 
00164 #ifdef HAVE_PTHREAD
00165     if (num_threads < 2)
00166     {
00167 #endif
00168         DF_THREAD_PARAM params;
00169         params.df=this;
00170         params.sub_index=sub_index;
00171         params.output=output;
00172         params.start=0;
00173         params.stop=num;
00174         params.alphas=alphas;
00175         params.vec=vec;
00176         params.dim=dim;
00177         params.bias=b;
00178         params.progress=false; //true;
00179         dense_dot_range_helper((void*) &params);
00180 #ifdef HAVE_PTHREAD
00181     }
00182     else
00183     {
00184         pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00185         DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
00186         int32_t step= num/num_threads;
00187 
00188         int32_t t;
00189 
00190         for (t=0; t<num_threads-1; t++)
00191         {
00192             params[t].df = this;
00193             params[t].sub_index=sub_index;
00194             params[t].output = output;
00195             params[t].start = t*step;
00196             params[t].stop = (t+1)*step;
00197             params[t].alphas=alphas;
00198             params[t].vec=vec;
00199             params[t].dim=dim;
00200             params[t].bias=b;
00201             params[t].progress = false;
00202             pthread_create(&threads[t], NULL,
00203                     CDotFeatures::dense_dot_range_helper, (void*)&params[t]);
00204         }
00205 
00206         params[t].df = this;
00207         params[t].sub_index=sub_index;
00208         params[t].output = output;
00209         params[t].start = t*step;
00210         params[t].stop = num;
00211         params[t].alphas=alphas;
00212         params[t].vec=vec;
00213         params[t].dim=dim;
00214         params[t].bias=b;
00215         params[t].progress = false; //true;
00216         dense_dot_range_helper((void*) &params[t]);
00217 
00218         for (t=0; t<num_threads-1; t++)
00219             pthread_join(threads[t], NULL);
00220 
00221         SG_FREE(params);
00222         SG_FREE(threads);
00223     }
00224 #endif
00225 
00226 #ifndef WIN32
00227         if ( CSignal::cancel_computations() )
00228             SG_INFO( "prematurely stopped.           \n");
00229 #endif
00230 }
00231 
00232 void* CDotFeatures::dense_dot_range_helper(void* p)
00233 {
00234     DF_THREAD_PARAM* par=(DF_THREAD_PARAM*) p;
00235     CDotFeatures* df=par->df;
00236     int32_t* sub_index=par->sub_index;
00237     float64_t* output=par->output;
00238     int32_t start=par->start;
00239     int32_t stop=par->stop;
00240     float64_t* alphas=par->alphas;
00241     float64_t* vec=par->vec;
00242     int32_t dim=par->dim;
00243     float64_t bias=par->bias;
00244     bool progress=par->progress;
00245 
00246     if (sub_index)
00247     {
00248 #ifdef WIN32
00249         for (int32_t i=start; i<stop i++)
00250 #else
00251         for (int32_t i=start; i<stop &&
00252                 !CSignal::cancel_computations(); i++)
00253 #endif
00254         {
00255             if (alphas)
00256                 output[i]=alphas[sub_index[i]]*df->dense_dot(sub_index[i], vec, dim)+bias;
00257             else
00258                 output[i]=df->dense_dot(sub_index[i], vec, dim)+bias;
00259             if (progress)
00260                 df->display_progress(start, stop, i);
00261         }
00262 
00263     }
00264     else
00265     {
00266 #ifdef WIN32
00267         for (int32_t i=start; i<stop i++)
00268 #else
00269         for (int32_t i=start; i<stop &&
00270                 !CSignal::cancel_computations(); i++)
00271 #endif
00272         {
00273             if (alphas)
00274                 output[i]=alphas[i]*df->dense_dot(i, vec, dim)+bias;
00275             else
00276                 output[i]=df->dense_dot(i, vec, dim)+bias;
00277             if (progress)
00278                 df->display_progress(start, stop, i);
00279         }
00280     }
00281 
00282     return NULL;
00283 }
00284 
00285 SGMatrix<float64_t> CDotFeatures::get_computed_dot_feature_matrix()
00286 {
00287     SGMatrix<float64_t> m;
00288     
00289     int64_t offs=0;
00290     int32_t num=get_num_vectors();
00291     int32_t dim=get_dim_feature_space();
00292     ASSERT(num>0);
00293     ASSERT(dim>0);
00294 
00295     int64_t sz=((uint64_t) num)* dim;
00296 
00297     m.do_free=true;
00298     m.num_cols=dim;
00299     m.num_rows=num;
00300     m.matrix=SG_MALLOC(float64_t, sz);
00301     memset(m.matrix, 0, sz*sizeof(float64_t));
00302 
00303     for (int32_t i=0; i<num; i++)
00304     {
00305         add_to_dense_vec(1.0, i, &(m.matrix[offs]), dim);
00306         offs+=dim;
00307     }
00308 
00309     return m;
00310 }
00311 
00312 SGVector<float64_t> CDotFeatures::get_computed_dot_feature_vector(int32_t num)
00313 {
00314     SGVector<float64_t> v;
00315 
00316     int32_t dim=get_dim_feature_space();
00317     ASSERT(num>=0 && num<=num);
00318     ASSERT(dim>0);
00319 
00320     v.do_free=true;
00321     v.vlen=dim;
00322     v.vector=SG_MALLOC(float64_t, dim);
00323     memset(v.vector, 0, dim*sizeof(float64_t));
00324 
00325     add_to_dense_vec(1.0, num, v.vector, dim);
00326     return v;
00327 }
00328 
00329 void CDotFeatures::benchmark_add_to_dense_vector(int32_t repeats)
00330 {
00331     int32_t num=get_num_vectors();
00332     int32_t d=get_dim_feature_space();
00333     float64_t* w= SG_MALLOC(float64_t, d);
00334     CMath::fill_vector(w, d, 0.0);
00335 
00336     CTime t;
00337     float64_t start_cpu=t.get_runtime();
00338     float64_t start_wall=t.get_curtime();
00339     for (int32_t r=0; r<repeats; r++)
00340     {
00341         for (int32_t i=0; i<num; i++)
00342             add_to_dense_vec(1.172343*(r+1), i, w, d);
00343     }
00344 
00345     SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n",
00346             repeats, num, (t.get_runtime()-start_cpu)/repeats,
00347             (t.get_curtime()-start_wall)/repeats);
00348 
00349     SG_FREE(w);
00350 }
00351 
00352 void CDotFeatures::benchmark_dense_dot_range(int32_t repeats)
00353 {
00354     int32_t num=get_num_vectors();
00355     int32_t d=get_dim_feature_space();
00356     float64_t* w= SG_MALLOC(float64_t, d);
00357     float64_t* out= SG_MALLOC(float64_t, num);
00358     float64_t* alphas= SG_MALLOC(float64_t, num);
00359     CMath::range_fill_vector(w, d, 17.0);
00360     CMath::range_fill_vector(alphas, num, 1.2345);
00361     //CMath::fill_vector(w, d, 17.0);
00362     //CMath::fill_vector(alphas, num, 1.2345);
00363 
00364     CTime t;
00365     float64_t start_cpu=t.get_runtime();
00366     float64_t start_wall=t.get_curtime();
00367 
00368     for (int32_t r=0; r<repeats; r++)
00369             dense_dot_range(out, 0, num, alphas, w, d, 23);
00370 
00371 #ifdef DEBUG_DOTFEATURES
00372     CMath::display_vector(out, 40, "dense_dot_range");
00373     float64_t* out2= SG_MALLOC(float64_t, num);
00374 
00375     for (int32_t r=0; r<repeats; r++)
00376     {
00377         CMath::fill_vector(out2, num, 0.0);
00378         for (int32_t i=0; i<num; i++)
00379             out2[i]+=dense_dot(i, w, d)*alphas[i]+23;
00380     }
00381     CMath::display_vector(out2, 40, "dense_dot");
00382     for (int32_t i=0; i<num; i++)
00383         out2[i]-=out[i];
00384     CMath::display_vector(out2, 40, "diff");
00385 #endif
00386     SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n",
00387             repeats, num, (t.get_runtime()-start_cpu)/repeats,
00388             (t.get_curtime()-start_wall)/repeats);
00389 
00390     SG_FREE(alphas);
00391     SG_FREE(out);
00392     SG_FREE(w);
00393 }
00394 
00395 SGVector<float64_t> CDotFeatures::get_mean()
00396 {
00397     int32_t num=get_num_vectors();
00398     int32_t dim=get_dim_feature_space();
00399     ASSERT(num>0);
00400     ASSERT(dim>0);
00401 
00402     SGVector<float64_t> mean(dim);
00403     memset(mean.vector, 0, sizeof(float64_t)*dim);
00404 
00405     for (int i = 0; i < num; i++)
00406         add_to_dense_vec(1, i, mean.vector, dim);
00407     for (int j = 0; j < dim; j++)
00408         mean.vector[j] /= num;
00409 
00410     return mean;
00411 }                                   
00412 
00413 SGMatrix<float64_t> CDotFeatures::get_cov()
00414 {
00415     int32_t num=get_num_vectors();
00416     int32_t dim=get_dim_feature_space();
00417     ASSERT(num>0);
00418     ASSERT(dim>0);
00419 
00420     SGMatrix<float64_t> cov(dim, dim);
00421 
00422     memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
00423 
00424     SGVector<float64_t> mean = get_mean();
00425 
00426     for (int i = 0; i < num; i++)
00427     {
00428         SGVector<float64_t> v = get_computed_dot_feature_vector(i);
00429         CMath::add<float64_t>(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
00430         for (int m = 0; m < v.vlen; m++)
00431         {
00432             for (int n = 0; n <= m ; n++)
00433             {
00434                 (cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
00435             }
00436         }
00437         v.free_vector();
00438     }
00439     for (int m = 0; m < dim; m++)
00440     {
00441         for (int n = 0; n <= m ; n++)
00442         {
00443             (cov.matrix)[m*dim+n] /= num;
00444         }
00445     }
00446     for (int m = 0; m < dim-1; m++)
00447     {
00448         for (int n = m+1; n < dim; n++)
00449         {
00450             (cov.matrix)[m*dim+n] = (cov.matrix)[n*dim+m];
00451         }
00452     }
00453     mean.destroy_vector();
00454     return cov;
00455 }