00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/features/DotFeatures.h>
00012 #include <shogun/io/SGIO.h>
00013 #include <shogun/lib/Signal.h>
00014 #include <shogun/base/Parallel.h>
00015 #include <shogun/base/Parameter.h>
00016
00017 #ifdef HAVE_PTHREAD
00018 #include <pthread.h>
00019 #endif
00020
00021 using namespace shogun;
00022
00023 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00024 struct DF_THREAD_PARAM
00025 {
00026 CDotFeatures* df;
00027 int32_t* sub_index;
00028 float64_t* output;
00029 int32_t start;
00030 int32_t stop;
00031 float64_t* alphas;
00032 float64_t* vec;
00033 int32_t dim;
00034 float64_t bias;
00035 bool progress;
00036 };
00037 #endif // DOXYGEN_SHOULD_SKIP_THIS
00038
00039
00040 CDotFeatures::CDotFeatures(int32_t size)
00041 :CFeatures(size), combined_weight(1.0)
00042 {
00043 init();
00044 set_property(FP_DOT);
00045 }
00046
00047
00048 CDotFeatures::CDotFeatures(const CDotFeatures & orig)
00049 :CFeatures(orig), combined_weight(orig.combined_weight)
00050 {
00051 init();
00052 }
00053
00054
00055 CDotFeatures::CDotFeatures(CFile* loader)
00056 :CFeatures(loader)
00057 {
00058 init();
00059 }
00060
00061 void
00062 CDotFeatures::init(void)
00063 {
00064 m_parameters->add(&combined_weight, "combined_weight",
00065 "Feature weighting in combined dot features.");
00066 }
00067
00068 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00069 {
00070 ASSERT(output);
00071
00072
00073 output-=start;
00074 ASSERT(start>=0);
00075 ASSERT(start<stop);
00076 ASSERT(stop<=get_num_vectors());
00077
00078 int32_t num_vectors=stop-start;
00079 ASSERT(num_vectors>0);
00080
00081 int32_t num_threads=parallel->get_num_threads();
00082 ASSERT(num_threads>0);
00083
00084 CSignal::clear_cancel();
00085
00086 #ifdef HAVE_PTHREAD
00087 if (num_threads < 2)
00088 {
00089 #endif
00090 DF_THREAD_PARAM params;
00091 params.df=this;
00092 params.sub_index=NULL;
00093 params.output=output;
00094 params.start=start;
00095 params.stop=stop;
00096 params.alphas=alphas;
00097 params.vec=vec;
00098 params.dim=dim;
00099 params.bias=b;
00100 params.progress=false;
00101 dense_dot_range_helper((void*) ¶ms);
00102 #ifdef HAVE_PTHREAD
00103 }
00104 else
00105 {
00106 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00107 DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
00108 int32_t step= num_vectors/num_threads;
00109
00110 int32_t t;
00111
00112 for (t=0; t<num_threads-1; t++)
00113 {
00114 params[t].df = this;
00115 params[t].sub_index=NULL;
00116 params[t].output = output;
00117 params[t].start = start+t*step;
00118 params[t].stop = start+(t+1)*step;
00119 params[t].alphas=alphas;
00120 params[t].vec=vec;
00121 params[t].dim=dim;
00122 params[t].bias=b;
00123 params[t].progress = false;
00124 pthread_create(&threads[t], NULL,
00125 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]);
00126 }
00127
00128 params[t].df = this;
00129 params[t].output = output;
00130 params[t].sub_index=NULL;
00131 params[t].start = start+t*step;
00132 params[t].stop = stop;
00133 params[t].alphas=alphas;
00134 params[t].vec=vec;
00135 params[t].dim=dim;
00136 params[t].bias=b;
00137 params[t].progress = false;
00138 dense_dot_range_helper((void*) ¶ms[t]);
00139
00140 for (t=0; t<num_threads-1; t++)
00141 pthread_join(threads[t], NULL);
00142
00143 SG_FREE(params);
00144 SG_FREE(threads);
00145 }
00146 #endif
00147
00148 #ifndef WIN32
00149 if ( CSignal::cancel_computations() )
00150 SG_INFO( "prematurely stopped. \n");
00151 #endif
00152 }
00153
00154 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00155 {
00156 ASSERT(sub_index);
00157 ASSERT(output);
00158
00159 int32_t num_threads=parallel->get_num_threads();
00160 ASSERT(num_threads>0);
00161
00162 CSignal::clear_cancel();
00163
00164 #ifdef HAVE_PTHREAD
00165 if (num_threads < 2)
00166 {
00167 #endif
00168 DF_THREAD_PARAM params;
00169 params.df=this;
00170 params.sub_index=sub_index;
00171 params.output=output;
00172 params.start=0;
00173 params.stop=num;
00174 params.alphas=alphas;
00175 params.vec=vec;
00176 params.dim=dim;
00177 params.bias=b;
00178 params.progress=false;
00179 dense_dot_range_helper((void*) ¶ms);
00180 #ifdef HAVE_PTHREAD
00181 }
00182 else
00183 {
00184 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00185 DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
00186 int32_t step= num/num_threads;
00187
00188 int32_t t;
00189
00190 for (t=0; t<num_threads-1; t++)
00191 {
00192 params[t].df = this;
00193 params[t].sub_index=sub_index;
00194 params[t].output = output;
00195 params[t].start = t*step;
00196 params[t].stop = (t+1)*step;
00197 params[t].alphas=alphas;
00198 params[t].vec=vec;
00199 params[t].dim=dim;
00200 params[t].bias=b;
00201 params[t].progress = false;
00202 pthread_create(&threads[t], NULL,
00203 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]);
00204 }
00205
00206 params[t].df = this;
00207 params[t].sub_index=sub_index;
00208 params[t].output = output;
00209 params[t].start = t*step;
00210 params[t].stop = num;
00211 params[t].alphas=alphas;
00212 params[t].vec=vec;
00213 params[t].dim=dim;
00214 params[t].bias=b;
00215 params[t].progress = false;
00216 dense_dot_range_helper((void*) ¶ms[t]);
00217
00218 for (t=0; t<num_threads-1; t++)
00219 pthread_join(threads[t], NULL);
00220
00221 SG_FREE(params);
00222 SG_FREE(threads);
00223 }
00224 #endif
00225
00226 #ifndef WIN32
00227 if ( CSignal::cancel_computations() )
00228 SG_INFO( "prematurely stopped. \n");
00229 #endif
00230 }
00231
00232 void* CDotFeatures::dense_dot_range_helper(void* p)
00233 {
00234 DF_THREAD_PARAM* par=(DF_THREAD_PARAM*) p;
00235 CDotFeatures* df=par->df;
00236 int32_t* sub_index=par->sub_index;
00237 float64_t* output=par->output;
00238 int32_t start=par->start;
00239 int32_t stop=par->stop;
00240 float64_t* alphas=par->alphas;
00241 float64_t* vec=par->vec;
00242 int32_t dim=par->dim;
00243 float64_t bias=par->bias;
00244 bool progress=par->progress;
00245
00246 if (sub_index)
00247 {
00248 #ifdef WIN32
00249 for (int32_t i=start; i<stop i++)
00250 #else
00251 for (int32_t i=start; i<stop &&
00252 !CSignal::cancel_computations(); i++)
00253 #endif
00254 {
00255 if (alphas)
00256 output[i]=alphas[sub_index[i]]*df->dense_dot(sub_index[i], vec, dim)+bias;
00257 else
00258 output[i]=df->dense_dot(sub_index[i], vec, dim)+bias;
00259 if (progress)
00260 df->display_progress(start, stop, i);
00261 }
00262
00263 }
00264 else
00265 {
00266 #ifdef WIN32
00267 for (int32_t i=start; i<stop i++)
00268 #else
00269 for (int32_t i=start; i<stop &&
00270 !CSignal::cancel_computations(); i++)
00271 #endif
00272 {
00273 if (alphas)
00274 output[i]=alphas[i]*df->dense_dot(i, vec, dim)+bias;
00275 else
00276 output[i]=df->dense_dot(i, vec, dim)+bias;
00277 if (progress)
00278 df->display_progress(start, stop, i);
00279 }
00280 }
00281
00282 return NULL;
00283 }
00284
00285 SGMatrix<float64_t> CDotFeatures::get_computed_dot_feature_matrix()
00286 {
00287 SGMatrix<float64_t> m;
00288
00289 int64_t offs=0;
00290 int32_t num=get_num_vectors();
00291 int32_t dim=get_dim_feature_space();
00292 ASSERT(num>0);
00293 ASSERT(dim>0);
00294
00295 int64_t sz=((uint64_t) num)* dim;
00296
00297 m.do_free=true;
00298 m.num_cols=dim;
00299 m.num_rows=num;
00300 m.matrix=SG_MALLOC(float64_t, sz);
00301 memset(m.matrix, 0, sz*sizeof(float64_t));
00302
00303 for (int32_t i=0; i<num; i++)
00304 {
00305 add_to_dense_vec(1.0, i, &(m.matrix[offs]), dim);
00306 offs+=dim;
00307 }
00308
00309 return m;
00310 }
00311
00312 SGVector<float64_t> CDotFeatures::get_computed_dot_feature_vector(int32_t num)
00313 {
00314 SGVector<float64_t> v;
00315
00316 int32_t dim=get_dim_feature_space();
00317 ASSERT(num>=0 && num<=num);
00318 ASSERT(dim>0);
00319
00320 v.do_free=true;
00321 v.vlen=dim;
00322 v.vector=SG_MALLOC(float64_t, dim);
00323 memset(v.vector, 0, dim*sizeof(float64_t));
00324
00325 add_to_dense_vec(1.0, num, v.vector, dim);
00326 return v;
00327 }
00328
00329 void CDotFeatures::benchmark_add_to_dense_vector(int32_t repeats)
00330 {
00331 int32_t num=get_num_vectors();
00332 int32_t d=get_dim_feature_space();
00333 float64_t* w= SG_MALLOC(float64_t, d);
00334 CMath::fill_vector(w, d, 0.0);
00335
00336 CTime t;
00337 float64_t start_cpu=t.get_runtime();
00338 float64_t start_wall=t.get_curtime();
00339 for (int32_t r=0; r<repeats; r++)
00340 {
00341 for (int32_t i=0; i<num; i++)
00342 add_to_dense_vec(1.172343*(r+1), i, w, d);
00343 }
00344
00345 SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n",
00346 repeats, num, (t.get_runtime()-start_cpu)/repeats,
00347 (t.get_curtime()-start_wall)/repeats);
00348
00349 SG_FREE(w);
00350 }
00351
00352 void CDotFeatures::benchmark_dense_dot_range(int32_t repeats)
00353 {
00354 int32_t num=get_num_vectors();
00355 int32_t d=get_dim_feature_space();
00356 float64_t* w= SG_MALLOC(float64_t, d);
00357 float64_t* out= SG_MALLOC(float64_t, num);
00358 float64_t* alphas= SG_MALLOC(float64_t, num);
00359 CMath::range_fill_vector(w, d, 17.0);
00360 CMath::range_fill_vector(alphas, num, 1.2345);
00361
00362
00363
00364 CTime t;
00365 float64_t start_cpu=t.get_runtime();
00366 float64_t start_wall=t.get_curtime();
00367
00368 for (int32_t r=0; r<repeats; r++)
00369 dense_dot_range(out, 0, num, alphas, w, d, 23);
00370
00371 #ifdef DEBUG_DOTFEATURES
00372 CMath::display_vector(out, 40, "dense_dot_range");
00373 float64_t* out2= SG_MALLOC(float64_t, num);
00374
00375 for (int32_t r=0; r<repeats; r++)
00376 {
00377 CMath::fill_vector(out2, num, 0.0);
00378 for (int32_t i=0; i<num; i++)
00379 out2[i]+=dense_dot(i, w, d)*alphas[i]+23;
00380 }
00381 CMath::display_vector(out2, 40, "dense_dot");
00382 for (int32_t i=0; i<num; i++)
00383 out2[i]-=out[i];
00384 CMath::display_vector(out2, 40, "diff");
00385 #endif
00386 SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n",
00387 repeats, num, (t.get_runtime()-start_cpu)/repeats,
00388 (t.get_curtime()-start_wall)/repeats);
00389
00390 SG_FREE(alphas);
00391 SG_FREE(out);
00392 SG_FREE(w);
00393 }
00394
00395 SGVector<float64_t> CDotFeatures::get_mean()
00396 {
00397 int32_t num=get_num_vectors();
00398 int32_t dim=get_dim_feature_space();
00399 ASSERT(num>0);
00400 ASSERT(dim>0);
00401
00402 SGVector<float64_t> mean(dim);
00403 memset(mean.vector, 0, sizeof(float64_t)*dim);
00404
00405 for (int i = 0; i < num; i++)
00406 add_to_dense_vec(1, i, mean.vector, dim);
00407 for (int j = 0; j < dim; j++)
00408 mean.vector[j] /= num;
00409
00410 return mean;
00411 }
00412
00413 SGMatrix<float64_t> CDotFeatures::get_cov()
00414 {
00415 int32_t num=get_num_vectors();
00416 int32_t dim=get_dim_feature_space();
00417 ASSERT(num>0);
00418 ASSERT(dim>0);
00419
00420 SGMatrix<float64_t> cov(dim, dim);
00421
00422 memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
00423
00424 SGVector<float64_t> mean = get_mean();
00425
00426 for (int i = 0; i < num; i++)
00427 {
00428 SGVector<float64_t> v = get_computed_dot_feature_vector(i);
00429 CMath::add<float64_t>(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
00430 for (int m = 0; m < v.vlen; m++)
00431 {
00432 for (int n = 0; n <= m ; n++)
00433 {
00434 (cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
00435 }
00436 }
00437 v.free_vector();
00438 }
00439 for (int m = 0; m < dim; m++)
00440 {
00441 for (int n = 0; n <= m ; n++)
00442 {
00443 (cov.matrix)[m*dim+n] /= num;
00444 }
00445 }
00446 for (int m = 0; m < dim-1; m++)
00447 {
00448 for (int n = m+1; n < dim; n++)
00449 {
00450 (cov.matrix)[m*dim+n] = (cov.matrix)[n*dim+m];
00451 }
00452 }
00453 mean.destroy_vector();
00454 return cov;
00455 }