00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "features/DotFeatures.h"
00012 #include "lib/io.h"
00013 #include "lib/Signal.h"
00014 #include "base/Parallel.h"
00015 #include "base/Parameter.h"
00016
00017 #ifndef WIN32
00018 #include <pthread.h>
00019 #endif
00020
00021 using namespace shogun;
00022
00023 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00024 struct DF_THREAD_PARAM
00025 {
00026 CDotFeatures* df;
00027 int32_t* sub_index;
00028 float64_t* output;
00029 int32_t start;
00030 int32_t stop;
00031 float64_t* alphas;
00032 float64_t* vec;
00033 int32_t dim;
00034 float64_t bias;
00035 bool progress;
00036 };
00037 #endif // DOXYGEN_SHOULD_SKIP_THIS
00038
00039
00040 CDotFeatures::CDotFeatures(int32_t size)
00041 :CFeatures(size), combined_weight(1.0)
00042 {
00043 init();
00044 set_property(FP_DOT);
00045 }
00046
00047
00048 CDotFeatures::CDotFeatures(const CDotFeatures & orig)
00049 :CFeatures(orig), combined_weight(orig.combined_weight)
00050 {
00051 init();
00052 }
00053
00054
00055 CDotFeatures::CDotFeatures(CFile* loader)
00056 :CFeatures(loader)
00057 {
00058 init();
00059 }
00060
00061 void
00062 CDotFeatures::init(void)
00063 {
00064 m_parameters->add(&combined_weight, "combined_weight",
00065 "Feature weighting in combined dot features.");
00066 }
00067
00068 void CDotFeatures::dense_dot_range(float64_t* output, int32_t start, int32_t stop, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00069 {
00070 ASSERT(output);
00071
00072
00073 output-=start;
00074 ASSERT(start>=0);
00075 ASSERT(start<stop);
00076 ASSERT(stop<=get_num_vectors());
00077
00078 int32_t num_vectors=stop-start;
00079 ASSERT(num_vectors>0);
00080
00081 int32_t num_threads=parallel->get_num_threads();
00082 ASSERT(num_threads>0);
00083
00084 CSignal::clear_cancel();
00085
00086 #ifndef WIN32
00087 if (num_threads < 2)
00088 {
00089 #endif
00090 DF_THREAD_PARAM params;
00091 params.df=this;
00092 params.sub_index=NULL;
00093 params.output=output;
00094 params.start=start;
00095 params.stop=stop;
00096 params.alphas=alphas;
00097 params.vec=vec;
00098 params.dim=dim;
00099 params.bias=b;
00100 params.progress=false;
00101 dense_dot_range_helper((void*) ¶ms);
00102 #ifndef WIN32
00103 }
00104 else
00105 {
00106 pthread_t* threads = new pthread_t[num_threads-1];
00107 DF_THREAD_PARAM* params = new DF_THREAD_PARAM[num_threads];
00108 int32_t step= num_vectors/num_threads;
00109
00110 int32_t t;
00111
00112 for (t=0; t<num_threads-1; t++)
00113 {
00114 params[t].df = this;
00115 params[t].sub_index=NULL;
00116 params[t].output = output;
00117 params[t].start = start+t*step;
00118 params[t].stop = start+(t+1)*step;
00119 params[t].alphas=alphas;
00120 params[t].vec=vec;
00121 params[t].dim=dim;
00122 params[t].bias=b;
00123 params[t].progress = false;
00124 pthread_create(&threads[t], NULL,
00125 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]);
00126 }
00127
00128 params[t].df = this;
00129 params[t].output = output;
00130 params[t].sub_index=NULL;
00131 params[t].start = start+t*step;
00132 params[t].stop = stop;
00133 params[t].alphas=alphas;
00134 params[t].vec=vec;
00135 params[t].dim=dim;
00136 params[t].bias=b;
00137 params[t].progress = false;
00138 dense_dot_range_helper((void*) ¶ms[t]);
00139
00140 for (t=0; t<num_threads-1; t++)
00141 pthread_join(threads[t], NULL);
00142
00143 delete[] params;
00144 delete[] threads;
00145 }
00146 #endif
00147
00148 #ifndef WIN32
00149 if ( CSignal::cancel_computations() )
00150 SG_INFO( "prematurely stopped. \n");
00151 #endif
00152 }
00153
00154 void CDotFeatures::dense_dot_range_subset(int32_t* sub_index, int32_t num, float64_t* output, float64_t* alphas, float64_t* vec, int32_t dim, float64_t b)
00155 {
00156 ASSERT(sub_index);
00157 ASSERT(output);
00158
00159 int32_t num_threads=parallel->get_num_threads();
00160 ASSERT(num_threads>0);
00161
00162 CSignal::clear_cancel();
00163
00164 #ifndef WIN32
00165 if (num_threads < 2)
00166 {
00167 #endif
00168 DF_THREAD_PARAM params;
00169 params.df=this;
00170 params.sub_index=sub_index;
00171 params.output=output;
00172 params.start=0;
00173 params.stop=num;
00174 params.alphas=alphas;
00175 params.vec=vec;
00176 params.dim=dim;
00177 params.bias=b;
00178 params.progress=false;
00179 dense_dot_range_helper((void*) ¶ms);
00180 #ifndef WIN32
00181 }
00182 else
00183 {
00184 pthread_t* threads = new pthread_t[num_threads-1];
00185 DF_THREAD_PARAM* params = new DF_THREAD_PARAM[num_threads];
00186 int32_t step= num/num_threads;
00187
00188 int32_t t;
00189
00190 for (t=0; t<num_threads-1; t++)
00191 {
00192 params[t].df = this;
00193 params[t].sub_index=sub_index;
00194 params[t].output = output;
00195 params[t].start = t*step;
00196 params[t].stop = (t+1)*step;
00197 params[t].alphas=alphas;
00198 params[t].vec=vec;
00199 params[t].dim=dim;
00200 params[t].bias=b;
00201 params[t].progress = false;
00202 pthread_create(&threads[t], NULL,
00203 CDotFeatures::dense_dot_range_helper, (void*)¶ms[t]);
00204 }
00205
00206 params[t].df = this;
00207 params[t].sub_index=sub_index;
00208 params[t].output = output;
00209 params[t].start = t*step;
00210 params[t].stop = num;
00211 params[t].alphas=alphas;
00212 params[t].vec=vec;
00213 params[t].dim=dim;
00214 params[t].bias=b;
00215 params[t].progress = false;
00216 dense_dot_range_helper((void*) ¶ms[t]);
00217
00218 for (t=0; t<num_threads-1; t++)
00219 pthread_join(threads[t], NULL);
00220
00221 delete[] params;
00222 delete[] threads;
00223 }
00224 #endif
00225
00226 #ifndef WIN32
00227 if ( CSignal::cancel_computations() )
00228 SG_INFO( "prematurely stopped. \n");
00229 #endif
00230 }
00231
00232 void* CDotFeatures::dense_dot_range_helper(void* p)
00233 {
00234 DF_THREAD_PARAM* par=(DF_THREAD_PARAM*) p;
00235 CDotFeatures* df=par->df;
00236 int32_t* sub_index=par->sub_index;
00237 float64_t* output=par->output;
00238 int32_t start=par->start;
00239 int32_t stop=par->stop;
00240 float64_t* alphas=par->alphas;
00241 float64_t* vec=par->vec;
00242 int32_t dim=par->dim;
00243 float64_t bias=par->bias;
00244 bool progress=par->progress;
00245
00246 if (sub_index)
00247 {
00248 #ifdef WIN32
00249 for (int32_t i=start; i<stop i++)
00250 #else
00251 for (int32_t i=start; i<stop &&
00252 !CSignal::cancel_computations(); i++)
00253 #endif
00254 {
00255 if (alphas)
00256 output[i]=alphas[sub_index[i]]*df->dense_dot(sub_index[i], vec, dim)+bias;
00257 else
00258 output[i]=df->dense_dot(sub_index[i], vec, dim)+bias;
00259 if (progress)
00260 df->display_progress(start, stop, i);
00261 }
00262
00263 }
00264 else
00265 {
00266 #ifdef WIN32
00267 for (int32_t i=start; i<stop i++)
00268 #else
00269 for (int32_t i=start; i<stop &&
00270 !CSignal::cancel_computations(); i++)
00271 #endif
00272 {
00273 if (alphas)
00274 output[i]=alphas[i]*df->dense_dot(i, vec, dim)+bias;
00275 else
00276 output[i]=df->dense_dot(i, vec, dim)+bias;
00277 if (progress)
00278 df->display_progress(start, stop, i);
00279 }
00280 }
00281
00282 return NULL;
00283 }
00284
00285 void CDotFeatures::get_feature_matrix(float64_t** dst, int32_t* num_feat, int32_t* num_vec)
00286 {
00287 int64_t offs=0;
00288 int32_t num=get_num_vectors();
00289 int32_t dim=get_dim_feature_space();
00290 ASSERT(num>0);
00291 ASSERT(dim>0);
00292
00293 int64_t sz=((uint64_t) num)* dim;
00294
00295 *num_feat=dim;
00296 *num_vec=num;
00297 *dst=new float64_t[sz];
00298 memset(*dst, 0, sz*sizeof(float64_t));
00299
00300 for (int32_t i=0; i<num; i++)
00301 {
00302 add_to_dense_vec(1.0, i, &((*dst)[offs]), dim);
00303 offs+=dim;
00304 }
00305 }
00306
00307 void CDotFeatures::get_feature_vector(float64_t** dst, int32_t* len, int32_t num)
00308 {
00309 int32_t dim=get_dim_feature_space();
00310 ASSERT(num>=0 && num<=num);
00311 ASSERT(dim>0);
00312
00313 *len=dim;
00314 *dst=new float64_t[dim];
00315 memset(*dst, 0, dim*sizeof(float64_t));
00316
00317 add_to_dense_vec(1.0, num, *dst, dim);
00318 }
00319
00320 void CDotFeatures::benchmark_add_to_dense_vector(int32_t repeats)
00321 {
00322 int32_t num=get_num_vectors();
00323 int32_t d=get_dim_feature_space();
00324 float64_t* w= new float64_t[d];
00325 CMath::fill_vector(w, d, 0.0);
00326
00327 CTime t;
00328 float64_t start_cpu=t.get_runtime();
00329 float64_t start_wall=t.get_curtime();
00330 for (int32_t r=0; r<repeats; r++)
00331 {
00332 for (int32_t i=0; i<num; i++)
00333 add_to_dense_vec(1.172343*(r+1), i, w, d);
00334 }
00335
00336 SG_PRINT("Time to process %d x num=%d add_to_dense_vector ops: cputime %fs walltime %fs\n",
00337 repeats, num, (t.get_runtime()-start_cpu)/repeats,
00338 (t.get_curtime()-start_wall)/repeats);
00339
00340 delete[] w;
00341 }
00342
00343 void CDotFeatures::benchmark_dense_dot_range(int32_t repeats)
00344 {
00345 int32_t num=get_num_vectors();
00346 int32_t d=get_dim_feature_space();
00347 float64_t* w= new float64_t[d];
00348 float64_t* out= new float64_t[num];
00349 float64_t* alphas= new float64_t[num];
00350 CMath::range_fill_vector(w, d, 17.0);
00351 CMath::range_fill_vector(alphas, num, 1.2345);
00352
00353
00354
00355 CTime t;
00356 float64_t start_cpu=t.get_runtime();
00357 float64_t start_wall=t.get_curtime();
00358
00359 for (int32_t r=0; r<repeats; r++)
00360 dense_dot_range(out, 0, num, alphas, w, d, 23);
00361
00362 #ifdef DEBUG_DOTFEATURES
00363 CMath::display_vector(out, 40, "dense_dot_range");
00364 float64_t* out2= new float64_t[num];
00365
00366 for (int32_t r=0; r<repeats; r++)
00367 {
00368 CMath::fill_vector(out2, num, 0.0);
00369 for (int32_t i=0; i<num; i++)
00370 out2[i]+=dense_dot(i, w, d)*alphas[i]+23;
00371 }
00372 CMath::display_vector(out2, 40, "dense_dot");
00373 for (int32_t i=0; i<num; i++)
00374 out2[i]-=out[i];
00375 CMath::display_vector(out2, 40, "diff");
00376 #endif
00377 SG_PRINT("Time to process %d x num=%d dense_dot_range ops: cputime %fs walltime %fs\n",
00378 repeats, num, (t.get_runtime()-start_cpu)/repeats,
00379 (t.get_curtime()-start_wall)/repeats);
00380
00381 delete[] alphas;
00382 delete[] out;
00383 delete[] w;
00384 }