00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/features/DotFeatures.h>
00012 #include <shogun/io/SGIO.h>
00013 #include <shogun/lib/Signal.h>
00014 #include <shogun/lib/Time.h>
00015 #include <shogun/mathematics/Math.h>
00016 #include <shogun/base/Parallel.h>
00017 #include <shogun/base/Parameter.h>
00018
00019 #ifdef HAVE_PTHREAD
00020 #include <pthread.h>
00021 #endif
00022
00023 using namespace shogun;
00024
00025 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00026 struct DF_THREAD_PARAM
00027 {
00028 CDotFeatures* df;
00029 int32_t* sub_index;
00030 float64_t* output;
00031 int32_t start;
00032 int32_t stop;
00033 float64_t* alphas;
00034 float64_t* vec;
00035 int32_t dim;
00036 float64_t bias;
00037 bool progress;
00038 };
00039 #endif // DOXYGEN_SHOULD_SKIP_THIS
00040
00041
00042 CDotFeatures::CDotFeatures(int32_t size)
00043 :CFeatures(size), combined_weight(1.0)
00044 {
00045 init();
00046 }
00047
00048
00049 CDotFeatures::CDotFeatures(const CDotFeatures & orig)
00050 :CFeatures(orig), combined_weight(orig.combined_weight)
00051 {
00052 init();
00053 }
00054
00055
00056 CDotFeatures::CDotFeatures(CFile* loader)
00057 :CFeatures(loader)
00058 {
00059 init();
00060 }
00061
00062 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00063 {
00064 ASSERT(output);
00065
00066
00067 output-=start;
00068 ASSERT(start>=0);
00069 ASSERT(start<stop);
00070 ASSERT(stop<=get_num_vectors());
00071
00072 int32_t num_vectors=stop-start;
00073 ASSERT(num_vectors>0);
00074
00075 int32_t num_threads=parallel->get_num_threads();
00076 ASSERT(num_threads>0);
00077
00078 CSignal::clear_cancel();
00079
00080 #ifdef HAVE_PTHREAD
00081 if (num_threads < 2)
00082 {
00083 #endif
00084 DF_THREAD_PARAM params;
00085 params.df=this;
00086 params.sub_index=NULL;
00087 params.output=output;
00088 params.start=start;
00089 params.stop=stop;
00090 params.alphas=alphas;
00091 params.vec=vec;
00092 params.dim=dim;
00093 params.bias=b;
00094 params.progress=false;
00095 dense_dot_range_helper((void*) ¶ms);
00096 #ifdef HAVE_PTHREAD
00097 }
00098 else
00099 {
00100 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00101 DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
00102 int32_t step= num_vectors/num_threads;
00103
00104 int32_t t;
00105
00106 for (t=0; t<num_threads-1; t++)
00107 {
00108 params[t].df = this;
00109 params[t].sub_index=NULL;
00110 params[t].output = output;
00111 params[t].start = start+t*step;
00112 params[t].stop = start+(t+1)*step;
00113 params[t].alphas=alphas;
00114 params[t].vec=vec;
00115 params[t].dim=dim;
00116 params[t].bias=b;
00117 params[t].progress = false;
00118 pthread_create(&threads[t], NULL,
00119 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]);
00120 }
00121
00122 params[t].df = this;
00123 params[t].output = output;
00124 params[t].sub_index=NULL;
00125 params[t].start = start+t*step;
00126 params[t].stop = stop;
00127 params[t].alphas=alphas;
00128 params[t].vec=vec;
00129 params[t].dim=dim;
00130 params[t].bias=b;
00131 params[t].progress = false;
00132 dense_dot_range_helper((void*) ¶ms[t]);
00133
00134 for (t=0; t<num_threads-1; t++)
00135 pthread_join(threads[t], NULL);
00136
00137 SG_FREE(params);
00138 SG_FREE(threads);
00139 }
00140 #endif
00141
00142 #ifndef WIN32
00143 if ( CSignal::cancel_computations() )
00144 SG_INFO( "prematurely stopped. \n");
00145 #endif
00146 }
00147
00148 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00149 {
00150 ASSERT(sub_index);
00151 ASSERT(output);
00152
00153 int32_t num_threads=parallel->get_num_threads();
00154 ASSERT(num_threads>0);
00155
00156 CSignal::clear_cancel();
00157
00158 #ifdef HAVE_PTHREAD
00159 if (num_threads < 2)
00160 {
00161 #endif
00162 DF_THREAD_PARAM params;
00163 params.df=this;
00164 params.sub_index=sub_index;
00165 params.output=output;
00166 params.start=0;
00167 params.stop=num;
00168 params.alphas=alphas;
00169 params.vec=vec;
00170 params.dim=dim;
00171 params.bias=b;
00172 params.progress=false;
00173 dense_dot_range_helper((void*) ¶ms);
00174 #ifdef HAVE_PTHREAD
00175 }
00176 else
00177 {
00178 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00179 DF_THREAD_PARAM* params = SG_MALLOC(DF_THREAD_PARAM, num_threads);
00180 int32_t step= num/num_threads;
00181
00182 int32_t t;
00183
00184 for (t=0; t<num_threads-1; t++)
00185 {
00186 params[t].df = this;
00187 params[t].sub_index=sub_index;
00188 params[t].output = output;
00189 params[t].start = t*step;
00190 params[t].stop = (t+1)*step;
00191 params[t].alphas=alphas;
00192 params[t].vec=vec;
00193 params[t].dim=dim;
00194 params[t].bias=b;
00195 params[t].progress = false;
00196 pthread_create(&threads[t], NULL,
00197 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]);
00198 }
00199
00200 params[t].df = this;
00201 params[t].sub_index=sub_index;
00202 params[t].output = output;
00203 params[t].start = t*step;
00204 params[t].stop = num;
00205 params[t].alphas=alphas;
00206 params[t].vec=vec;
00207 params[t].dim=dim;
00208 params[t].bias=b;
00209 params[t].progress = false;
00210 dense_dot_range_helper((void*) ¶ms[t]);
00211
00212 for (t=0; t<num_threads-1; t++)
00213 pthread_join(threads[t], NULL);
00214
00215 SG_FREE(params);
00216 SG_FREE(threads);
00217 }
00218 #endif
00219
00220 #ifndef WIN32
00221 if ( CSignal::cancel_computations() )
00222 SG_INFO( "prematurely stopped. \n");
00223 #endif
00224 }
00225
00226 void* CDotFeatures::dense_dot_range_helper(void* p)
00227 {
00228 DF_THREAD_PARAM* par=(DF_THREAD_PARAM*) p;
00229 CDotFeatures* df=par->df;
00230 int32_t* sub_index=par->sub_index;
00231 float64_t* output=par->output;
00232 int32_t start=par->start;
00233 int32_t stop=par->stop;
00234 float64_t* alphas=par->alphas;
00235 float64_t* vec=par->vec;
00236 int32_t dim=par->dim;
00237 float64_t bias=par->bias;
00238 bool progress=par->progress;
00239
00240 if (sub_index)
00241 {
00242 #ifdef WIN32
00243 for (int32_t i=start; i<stop i++)
00244 #else
00245 for (int32_t i=start; i<stop &&
00246 !CSignal::cancel_computations(); i++)
00247 #endif
00248 {
00249 if (alphas)
00250 output[i]=alphas[sub_index[i]]*df->dense_dot(sub_index[i], vec, dim)+bias;
00251 else
00252 output[i]=df->dense_dot(sub_index[i], vec, dim)+bias;
00253 if (progress)
00254 df->display_progress(start, stop, i);
00255 }
00256
00257 }
00258 else
00259 {
00260 #ifdef WIN32
00261 for (int32_t i=start; i<stop i++)
00262 #else
00263 for (int32_t i=start; i<stop &&
00264 !CSignal::cancel_computations(); i++)
00265 #endif
00266 {
00267 if (alphas)
00268 output[i]=alphas[i]*df->dense_dot(i, vec, dim)+bias;
00269 else
00270 output[i]=df->dense_dot(i, vec, dim)+bias;
00271 if (progress)
00272 df->display_progress(start, stop, i);
00273 }
00274 }
00275
00276 return NULL;
00277 }
00278
00279 SGMatrix<float64_t> CDotFeatures::get_computed_dot_feature_matrix()
00280 {
00281 SGMatrix<float64_t> m;
00282
00283 int64_t offs=0;
00284 int32_t num=get_num_vectors();
00285 int32_t dim=get_dim_feature_space();
00286 ASSERT(num>0);
00287 ASSERT(dim>0);
00288
00289 int64_t sz=((uint64_t) num)* dim;
00290
00291 m.do_free=true;
00292 m.num_cols=dim;
00293 m.num_rows=num;
00294 m.matrix=SG_MALLOC(float64_t, sz);
00295 memset(m.matrix, 0, sz*sizeof(float64_t));
00296
00297 for (int32_t i=0; i<num; i++)
00298 {
00299 add_to_dense_vec(1.0, i, &(m.matrix[offs]), dim);
00300 offs+=dim;
00301 }
00302
00303 return m;
00304 }
00305
00306 SGVector<float64_t> CDotFeatures::get_computed_dot_feature_vector(int32_t num)
00307 {
00308 SGVector<float64_t> v;
00309
00310 int32_t dim=get_dim_feature_space();
00311 ASSERT(num>=0 && num<=get_num_vectors());
00312 ASSERT(dim>0);
00313
00314 v.do_free=true;
00315 v.vlen=dim;
00316 v.vector=SG_MALLOC(float64_t, dim);
00317 memset(v.vector, 0, dim*sizeof(float64_t));
00318
00319 add_to_dense_vec(1.0, num, v.vector, dim);
00320 return v;
00321 }
00322
00323 void CDotFeatures::benchmark_add_to_dense_vector(int32_t repeats)
00324 {
00325 int32_t num=get_num_vectors();
00326 int32_t d=get_dim_feature_space();
00327 float64_t* w= SG_MALLOC(float64_t, d);
00328 CMath::fill_vector(w, d, 0.0);
00329
00330 CTime t;
00331 float64_t start_cpu=t.get_runtime();
00332 float64_t start_wall=t.get_curtime();
00333 for (int32_t r=0; r<repeats; r++)
00334 {
00335 for (int32_t i=0; i<num; i++)
00336 add_to_dense_vec(1.172343*(r+1), i, w, d);
00337 }
00338
00339 SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n",
00340 repeats, num, (t.get_runtime()-start_cpu)/repeats,
00341 (t.get_curtime()-start_wall)/repeats);
00342
00343 SG_FREE(w);
00344 }
00345
00346 void CDotFeatures::benchmark_dense_dot_range(int32_t repeats)
00347 {
00348 int32_t num=get_num_vectors();
00349 int32_t d=get_dim_feature_space();
00350 float64_t* w= SG_MALLOC(float64_t, d);
00351 float64_t* out= SG_MALLOC(float64_t, num);
00352 float64_t* alphas= SG_MALLOC(float64_t, num);
00353 CMath::range_fill_vector(w, d, 17.0);
00354 CMath::range_fill_vector(alphas, num, 1.2345);
00355
00356
00357
00358 CTime t;
00359 float64_t start_cpu=t.get_runtime();
00360 float64_t start_wall=t.get_curtime();
00361
00362 for (int32_t r=0; r<repeats; r++)
00363 dense_dot_range(out, 0, num, alphas, w, d, 23);
00364
00365 #ifdef DEBUG_DOTFEATURES
00366 CMath::display_vector(out, 40, "dense_dot_range");
00367 float64_t* out2= SG_MALLOC(float64_t, num);
00368
00369 for (int32_t r=0; r<repeats; r++)
00370 {
00371 CMath::fill_vector(out2, num, 0.0);
00372 for (int32_t i=0; i<num; i++)
00373 out2[i]+=dense_dot(i, w, d)*alphas[i]+23;
00374 }
00375 CMath::display_vector(out2, 40, "dense_dot");
00376 for (int32_t i=0; i<num; i++)
00377 out2[i]-=out[i];
00378 CMath::display_vector(out2, 40, "diff");
00379 #endif
00380 SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n",
00381 repeats, num, (t.get_runtime()-start_cpu)/repeats,
00382 (t.get_curtime()-start_wall)/repeats);
00383
00384 SG_FREE(alphas);
00385 SG_FREE(out);
00386 SG_FREE(w);
00387 }
00388
00389 SGVector<float64_t> CDotFeatures::get_mean()
00390 {
00391 int32_t num=get_num_vectors();
00392 int32_t dim=get_dim_feature_space();
00393 ASSERT(num>0);
00394 ASSERT(dim>0);
00395
00396 SGVector<float64_t> mean(dim);
00397 memset(mean.vector, 0, sizeof(float64_t)*dim);
00398
00399 for (int i = 0; i < num; i++)
00400 add_to_dense_vec(1, i, mean.vector, dim);
00401 for (int j = 0; j < dim; j++)
00402 mean.vector[j] /= num;
00403
00404 return mean;
00405 }
00406
00407 SGMatrix<float64_t> CDotFeatures::get_cov()
00408 {
00409 int32_t num=get_num_vectors();
00410 int32_t dim=get_dim_feature_space();
00411 ASSERT(num>0);
00412 ASSERT(dim>0);
00413
00414 SGMatrix<float64_t> cov(dim, dim);
00415
00416 memset(cov.matrix, 0, sizeof(float64_t)*dim*dim);
00417
00418 SGVector<float64_t> mean = get_mean();
00419
00420 for (int i = 0; i < num; i++)
00421 {
00422 SGVector<float64_t> v = get_computed_dot_feature_vector(i);
00423 CMath::add<float64_t>(v.vector, 1, v.vector, -1, mean.vector, v.vlen);
00424 for (int m = 0; m < v.vlen; m++)
00425 {
00426 for (int n = 0; n <= m ; n++)
00427 {
00428 (cov.matrix)[m*v.vlen+n] += v.vector[m]*v.vector[n];
00429 }
00430 }
00431 v.free_vector();
00432 }
00433 for (int m = 0; m < dim; m++)
00434 {
00435 for (int n = 0; n <= m ; n++)
00436 {
00437 (cov.matrix)[m*dim+n] /= num;
00438 }
00439 }
00440 for (int m = 0; m < dim-1; m++)
00441 {
00442 for (int n = m+1; n < dim; n++)
00443 {
00444 (cov.matrix)[m*dim+n] = (cov.matrix)[n*dim+m];
00445 }
00446 }
00447 mean.destroy_vector();
00448 return cov;
00449 }
00450
00451 void CDotFeatures::display_progress(int32_t start, int32_t stop, int32_t v)
00452 {
00453 int32_t num_vectors=stop-start;
00454 int32_t i=v-start;
00455
00456 if ( (i% (num_vectors/100+1))== 0)
00457 SG_PROGRESS(v, 0.0, num_vectors-1);
00458 }
00459
00460 void CDotFeatures::init()
00461 {
00462 set_property(FP_DOT);
00463 m_parameters->add(&combined_weight, "combined_weight",
00464 "Feature weighting in combined dot features.");
00465 }