00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/machine/KernelMachine.h>
00013 #include <shogun/lib/Signal.h>
00014 #include <shogun/labels/RegressionLabels.h>
00015 #include <shogun/base/Parameter.h>
00016 #include <shogun/base/ParameterMap.h>
00017
00018 using namespace shogun;
00019
00020 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00021 struct S_THREAD_PARAM
00022 {
00023 CKernelMachine* kernel_machine;
00024 float64_t* result;
00025 int32_t start;
00026 int32_t end;
00027
00028
00029 index_t* indices;
00030 index_t indices_len;
00031 bool verbose;
00032 };
00033 #endif // DOXYGEN_SHOULD_SKIP_THIS
00034
00035 CKernelMachine::CKernelMachine() : CMachine()
00036 {
00037 init();
00038 }
00039
00040 CKernelMachine::CKernelMachine(CKernel* k, SGVector<float64_t> alphas,
00041 SGVector<int32_t> svs, float64_t b) : CMachine()
00042 {
00043 init();
00044
00045 int32_t num_sv=svs.vlen;
00046 ASSERT(num_sv == alphas.vlen);
00047 create_new_model(num_sv);
00048 set_alphas(alphas);
00049 set_support_vectors(svs);
00050 set_kernel(kernel);
00051 set_bias(b);
00052 }
00053
00054 CKernelMachine::CKernelMachine(CKernelMachine* machine) : CMachine()
00055 {
00056 init();
00057
00058 SGVector<float64_t> alphas = machine->get_alphas().clone();
00059 SGVector<int32_t> svs = machine->get_support_vectors().clone();
00060 float64_t bias = machine->get_bias();
00061 CKernel* ker = machine->get_kernel();
00062
00063 int32_t num_sv = svs.vlen;
00064 create_new_model(num_sv);
00065 set_alphas(alphas);
00066 set_support_vectors(svs);
00067 set_bias(bias);
00068 set_kernel(ker);
00069 }
00070
00071 CKernelMachine::~CKernelMachine()
00072 {
00073 SG_UNREF(kernel);
00074 SG_UNREF(m_custom_kernel);
00075 SG_UNREF(m_kernel_backup);
00076 }
00077
00078 void CKernelMachine::set_kernel(CKernel* k)
00079 {
00080 SG_REF(k);
00081 SG_UNREF(kernel);
00082 kernel=k;
00083 }
00084
00085 CKernel* CKernelMachine::get_kernel()
00086 {
00087 SG_REF(kernel);
00088 return kernel;
00089 }
00090
00091 void CKernelMachine::set_batch_computation_enabled(bool enable)
00092 {
00093 use_batch_computation=enable;
00094 }
00095
00096 bool CKernelMachine::get_batch_computation_enabled()
00097 {
00098 return use_batch_computation;
00099 }
00100
00101 void CKernelMachine::set_linadd_enabled(bool enable)
00102 {
00103 use_linadd=enable;
00104 }
00105
00106 bool CKernelMachine::get_linadd_enabled()
00107 {
00108 return use_linadd;
00109 }
00110
00111 void CKernelMachine::set_bias_enabled(bool enable_bias)
00112 {
00113 use_bias=enable_bias;
00114 }
00115
00116 bool CKernelMachine::get_bias_enabled()
00117 {
00118 return use_bias;
00119 }
00120
00121 float64_t CKernelMachine::get_bias()
00122 {
00123 return m_bias;
00124 }
00125
00126 void CKernelMachine::set_bias(float64_t bias)
00127 {
00128 m_bias=bias;
00129 }
00130
00131 int32_t CKernelMachine::get_support_vector(int32_t idx)
00132 {
00133 ASSERT(m_svs.vector && idx<m_svs.vlen);
00134 return m_svs.vector[idx];
00135 }
00136
00137 float64_t CKernelMachine::get_alpha(int32_t idx)
00138 {
00139 if (!m_alpha.vector)
00140 SG_ERROR("No alphas set\n");
00141 if (idx>=m_alpha.vlen)
00142 SG_ERROR("Alphas index (%d) out of range (%d)\n", idx, m_svs.vlen);
00143 return m_alpha.vector[idx];
00144 }
00145
00146 bool CKernelMachine::set_support_vector(int32_t idx, int32_t val)
00147 {
00148 if (m_svs.vector && idx<m_svs.vlen)
00149 m_svs.vector[idx]=val;
00150 else
00151 return false;
00152
00153 return true;
00154 }
00155
00156 bool CKernelMachine::set_alpha(int32_t idx, float64_t val)
00157 {
00158 if (m_alpha.vector && idx<m_alpha.vlen)
00159 m_alpha.vector[idx]=val;
00160 else
00161 return false;
00162
00163 return true;
00164 }
00165
00166 int32_t CKernelMachine::get_num_support_vectors()
00167 {
00168 return m_svs.vlen;
00169 }
00170
00171 void CKernelMachine::set_alphas(SGVector<float64_t> alphas)
00172 {
00173 m_alpha = alphas;
00174 }
00175
00176 void CKernelMachine::set_support_vectors(SGVector<int32_t> svs)
00177 {
00178 m_svs = svs;
00179 }
00180
00181 SGVector<int32_t> CKernelMachine::get_support_vectors()
00182 {
00183 return m_svs;
00184 }
00185
00186 SGVector<float64_t> CKernelMachine::get_alphas()
00187 {
00188 return m_alpha;
00189 }
00190
00191 bool CKernelMachine::create_new_model(int32_t num)
00192 {
00193 m_alpha=SGVector<float64_t>();
00194 m_svs=SGVector<int32_t>();
00195
00196 m_bias=0;
00197
00198 if (num>0)
00199 {
00200 m_alpha= SGVector<float64_t>(num);
00201 m_svs= SGVector<int32_t>(num);
00202 return (m_alpha.vector!=NULL && m_svs.vector!=NULL);
00203 }
00204 else
00205 return true;
00206 }
00207
00208 bool CKernelMachine::init_kernel_optimization()
00209 {
00210 int32_t num_sv=get_num_support_vectors();
00211
00212 if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
00213 {
00214 int32_t * sv_idx = SG_MALLOC(int32_t, num_sv);
00215 float64_t* sv_weight = SG_MALLOC(float64_t, num_sv);
00216
00217 for(int32_t i=0; i<num_sv; i++)
00218 {
00219 sv_idx[i] = get_support_vector(i) ;
00220 sv_weight[i] = get_alpha(i) ;
00221 }
00222
00223 bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
00224
00225 SG_FREE(sv_idx);
00226 SG_FREE(sv_weight);
00227
00228 if (!ret)
00229 SG_ERROR( "initialization of kernel optimization failed\n");
00230
00231 return ret;
00232 }
00233 else
00234 SG_ERROR( "initialization of kernel optimization failed\n");
00235
00236 return false;
00237 }
00238
00239 CRegressionLabels* CKernelMachine::apply_regression(CFeatures* data)
00240 {
00241 SGVector<float64_t> outputs = apply_get_outputs(data);
00242 return new CRegressionLabels(outputs);
00243 }
00244
00245 CBinaryLabels* CKernelMachine::apply_binary(CFeatures* data)
00246 {
00247 SGVector<float64_t> outputs = apply_get_outputs(data);
00248 return new CBinaryLabels(outputs);
00249 }
00250
00251 SGVector<float64_t> CKernelMachine::apply_get_outputs(CFeatures* data)
00252 {
00253 SG_DEBUG("entering %s::apply_get_outputs(%s at %p)\n",
00254 get_name(), data ? data->get_name() : "NULL", data);
00255
00256 REQUIRE(kernel, "%s::apply_get_outputs(): No kernel assigned!\n");
00257
00258 if (!kernel->get_num_vec_lhs())
00259 {
00260 SG_ERROR("%s: No vectors on left hand side (%s). This is probably due to"
00261 " an implementation error in %s, where it was forgotten to set "
00262 "the data (m_svs) indices\n", get_name(),
00263 data->get_name());
00264 }
00265
00266 if (data)
00267 {
00268 CFeatures* lhs=kernel->get_lhs();
00269 REQUIRE(lhs, "%s::apply_get_outputs(): No left hand side specified\n",
00270 get_name());
00271 kernel->init(lhs, data);
00272 SG_UNREF(lhs);
00273 }
00274
00275
00276
00277
00278
00279
00280
00281
00282 CFeatures* rhs=kernel->get_rhs();
00283 int32_t num_vectors=rhs ? rhs->get_num_vectors() : kernel->get_num_vec_rhs();
00284 SG_UNREF(rhs)
00285
00286 SGVector<float64_t> output(num_vectors);
00287
00288 if (kernel->get_num_vec_rhs()>0)
00289 {
00290 SG_DEBUG( "computing output on %d test examples\n", num_vectors);
00291
00292 CSignal::clear_cancel();
00293
00294 if (io->get_show_progress())
00295 io->enable_progress();
00296 else
00297 io->disable_progress();
00298
00299 if (kernel->has_property(KP_BATCHEVALUATION) &&
00300 get_batch_computation_enabled())
00301 {
00302 output.zero();
00303 SG_DEBUG("Batch evaluation enabled\n");
00304 if (get_num_support_vectors()>0)
00305 {
00306 int32_t* sv_idx=SG_MALLOC(int32_t, get_num_support_vectors());
00307 float64_t* sv_weight=SG_MALLOC(float64_t, get_num_support_vectors());
00308 int32_t* idx=SG_MALLOC(int32_t, num_vectors);
00309
00310
00311 for (int32_t i=0; i<num_vectors; i++)
00312 idx[i]=i;
00313
00314 for (int32_t i=0; i<get_num_support_vectors(); i++)
00315 {
00316 sv_idx[i] = get_support_vector(i) ;
00317 sv_weight[i] = get_alpha(i) ;
00318 }
00319
00320 kernel->compute_batch(num_vectors, idx,
00321 output.vector, get_num_support_vectors(), sv_idx, sv_weight);
00322 SG_FREE(sv_idx);
00323 SG_FREE(sv_weight);
00324 SG_FREE(idx);
00325 }
00326
00327 for (int32_t i=0; i<num_vectors; i++)
00328 output[i] = get_bias() + output[i];
00329
00330 }
00331 else
00332 {
00333 int32_t num_threads=parallel->get_num_threads();
00334 ASSERT(num_threads>0);
00335
00336 if (num_threads < 2)
00337 {
00338 S_THREAD_PARAM params;
00339 params.kernel_machine=this;
00340 params.result = output.vector;
00341 params.start=0;
00342 params.end=num_vectors;
00343 params.verbose=true;
00344 params.indices = NULL;
00345 params.indices_len = 0;
00346 apply_helper((void*) ¶ms);
00347 }
00348 #ifdef HAVE_PTHREAD
00349 else
00350 {
00351 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00352 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, num_threads);
00353 int32_t step= num_vectors/num_threads;
00354
00355 int32_t t;
00356
00357 for (t=0; t<num_threads-1; t++)
00358 {
00359 params[t].kernel_machine = this;
00360 params[t].result = output.vector;
00361 params[t].start = t*step;
00362 params[t].end = (t+1)*step;
00363 params[t].verbose = false;
00364 params[t].indices = NULL;
00365 params[t].indices_len = 0;
00366 pthread_create(&threads[t], NULL,
00367 CKernelMachine::apply_helper, (void*)¶ms[t]);
00368 }
00369
00370 params[t].kernel_machine = this;
00371 params[t].result = output.vector;
00372 params[t].start = t*step;
00373 params[t].end = num_vectors;
00374 params[t].verbose = true;
00375 params[t].indices = NULL;
00376 params[t].indices_len = 0;
00377 apply_helper((void*) ¶ms[t]);
00378
00379 for (t=0; t<num_threads-1; t++)
00380 pthread_join(threads[t], NULL);
00381
00382 SG_FREE(params);
00383 SG_FREE(threads);
00384 }
00385 #endif
00386 }
00387
00388 #ifndef WIN32
00389 if ( CSignal::cancel_computations() )
00390 SG_INFO( "prematurely stopped. \n");
00391 else
00392 #endif
00393 SG_DONE();
00394 }
00395
00396 SG_DEBUG("leaving %s::apply_get_outputs(%s at %p)\n",
00397 get_name(), data ? data->get_name() : "NULL", data);
00398
00399 return output;
00400 }
00401
00402 float64_t CKernelMachine::apply_one(int32_t num)
00403 {
00404 ASSERT(kernel);
00405
00406 if (kernel->has_property(KP_LINADD) && (kernel->get_is_initialized()))
00407 {
00408 float64_t score = kernel->compute_optimized(num);
00409 return score+get_bias();
00410 }
00411 else
00412 {
00413 float64_t score=0;
00414 for(int32_t i=0; i<get_num_support_vectors(); i++)
00415 score+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
00416
00417 return score+get_bias();
00418 }
00419 }
00420
00421 void* CKernelMachine::apply_helper(void* p)
00422 {
00423 S_THREAD_PARAM* params = (S_THREAD_PARAM*) p;
00424 float64_t* result = params->result;
00425 CKernelMachine* kernel_machine = params->kernel_machine;
00426
00427 #ifdef WIN32
00428 for (int32_t vec=params->start; vec<params->end; vec++)
00429 #else
00430 for (int32_t vec=params->start; vec<params->end &&
00431 !CSignal::cancel_computations(); vec++)
00432 #endif
00433 {
00434 if (params->verbose)
00435 {
00436 int32_t num_vectors=params->end - params->start;
00437 int32_t v=vec-params->start;
00438 if ( (v% (num_vectors/100+1))== 0)
00439 SG_SPROGRESS(v, 0.0, num_vectors-1);
00440 }
00441
00442
00443 index_t idx=params->indices ? params->indices[vec] : vec;
00444 result[vec] = kernel_machine->apply_one(idx);
00445 }
00446
00447 return NULL;
00448 }
00449
00450 void CKernelMachine::store_model_features()
00451 {
00452 if (!kernel)
00453 SG_ERROR("kernel is needed to store SV features.\n");
00454
00455 CFeatures* lhs=kernel->get_lhs();
00456 CFeatures* rhs=kernel->get_rhs();
00457
00458 if (!lhs)
00459 SG_ERROR("kernel lhs is needed to store SV features.\n");
00460
00461
00462 CFeatures* sv_features=lhs->copy_subset(m_svs);
00463 SG_UNREF(lhs);
00464
00465
00466 kernel->init(sv_features, rhs);
00467
00468
00469 SG_UNREF(rhs);
00470
00471
00472 SG_UNREF(sv_features);
00473
00474
00475 m_svs.range_fill();
00476
00477 }
00478
00479 bool CKernelMachine::train_locked(SGVector<index_t> indices)
00480 {
00481 SG_DEBUG("entering %s::train_locked()\n", get_name());
00482 if (!is_data_locked())
00483 SG_ERROR("CKernelMachine::train_locked() call data_lock() before!\n");
00484
00485
00486 ASSERT(m_custom_kernel==kernel);
00487
00488
00489
00490
00491
00492 m_custom_kernel->remove_all_row_subsets();
00493
00494
00495 m_custom_kernel->add_row_subset(indices);
00496 m_custom_kernel->add_col_subset(indices);
00497
00498
00499 m_labels->add_subset(indices);
00500
00501
00502
00503 bool result=train_machine();
00504
00505
00506 m_custom_kernel->remove_col_subset();
00507
00508
00509 m_labels->remove_subset();
00510
00511 SG_DEBUG("leaving %s::train_locked()\n", get_name());
00512 return result;
00513 }
00514
00515 CBinaryLabels* CKernelMachine::apply_locked_binary(SGVector<index_t> indices)
00516 {
00517 SGVector<float64_t> outputs = apply_locked_get_output(indices);
00518 return new CBinaryLabels(outputs);
00519 }
00520
00521 CRegressionLabels* CKernelMachine::apply_locked_regression(
00522 SGVector<index_t> indices)
00523 {
00524 SGVector<float64_t> outputs = apply_locked_get_output(indices);
00525 return new CRegressionLabels(outputs);
00526 }
00527
00528 SGVector<float64_t> CKernelMachine::apply_locked_get_output(
00529 SGVector<index_t> indices)
00530 {
00531 if (!is_data_locked())
00532 SG_ERROR("CKernelMachine::apply_locked() call data_lock() before!\n");
00533
00534
00535 ASSERT(m_custom_kernel==kernel);
00536
00537 int32_t num_inds=indices.vlen;
00538 SGVector<float64_t> output(num_inds);
00539
00540 CSignal::clear_cancel();
00541
00542 if (io->get_show_progress())
00543 io->enable_progress();
00544 else
00545 io->disable_progress();
00546
00547
00548 int32_t num_threads=parallel->get_num_threads();
00549 ASSERT(num_threads>0);
00550
00551 if (num_threads<2)
00552 {
00553 S_THREAD_PARAM params;
00554 params.kernel_machine=this;
00555 params.result=output.vector;
00556
00557
00558 params.start=0;
00559 params.end=num_inds;
00560 params.indices=indices.vector;
00561 params.indices_len=indices.vlen;
00562
00563 params.verbose=true;
00564 apply_helper((void*) ¶ms);
00565 }
00566 #ifdef HAVE_PTHREAD
00567 else
00568 {
00569 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00570 S_THREAD_PARAM* params=SG_MALLOC(S_THREAD_PARAM, num_threads);
00571 int32_t step= num_inds/num_threads;
00572
00573 int32_t t;
00574 for (t=0; t<num_threads-1; t++)
00575 {
00576 params[t].kernel_machine=this;
00577 params[t].result=output.vector;
00578
00579
00580 params[t].start=t*step;
00581 params[t].end=(t+1)*step;
00582 params[t].indices=indices.vector;
00583 params[t].indices_len=indices.vlen;
00584
00585 params[t].verbose=false;
00586 pthread_create(&threads[t], NULL, CKernelMachine::apply_helper,
00587 (void*)¶ms[t]);
00588 }
00589
00590 params[t].kernel_machine=this;
00591 params[t].result=output.vector;
00592
00593
00594 params[t].start=t*step;
00595 params[t].end=num_inds;
00596 params[t].indices=indices.vector;
00597 params[t].indices_len=indices.vlen;
00598
00599 params[t].verbose=true;
00600 apply_helper((void*) ¶ms[t]);
00601
00602 for (t=0; t<num_threads-1; t++)
00603 pthread_join(threads[t], NULL);
00604
00605 SG_FREE(params);
00606 SG_FREE(threads);
00607 }
00608 #endif
00609
00610 #ifndef WIN32
00611 if ( CSignal::cancel_computations() )
00612 SG_INFO("prematurely stopped.\n");
00613 else
00614 #endif
00615 SG_DONE();
00616
00617 return output;
00618 }
00619
00620 void CKernelMachine::data_lock(CLabels* labs, CFeatures* features)
00621 {
00622 if ( !kernel )
00623 SG_ERROR("The kernel is not initialized\n");
00624
00625
00626 kernel->init(features, features);
00627
00628
00629 SG_UNREF(m_kernel_backup)
00630 m_kernel_backup=kernel;
00631 SG_REF(m_kernel_backup);
00632
00633
00634 SG_UNREF(m_custom_kernel);
00635
00636
00637 m_custom_kernel=new CCustomKernel(kernel);
00638 SG_REF(m_custom_kernel);
00639
00640
00641 SG_UNREF(kernel);
00642 kernel=m_custom_kernel;
00643 SG_REF(kernel);
00644
00645
00646 CMachine::data_lock(labs, features);
00647 }
00648
00649 void CKernelMachine::data_unlock()
00650 {
00651 SG_UNREF(m_custom_kernel);
00652 m_custom_kernel=NULL;
00653
00654
00655 if (m_kernel_backup)
00656 {
00657
00658 if (kernel!=m_kernel_backup)
00659 SG_UNREF(kernel);
00660
00661 kernel=m_kernel_backup;
00662 m_kernel_backup=NULL;
00663 }
00664
00665
00666 CMachine::data_unlock();
00667 }
00668
00669 void CKernelMachine::init()
00670 {
00671 m_bias=0.0;
00672 kernel=NULL;
00673 m_custom_kernel=NULL;
00674 m_kernel_backup=NULL;
00675 use_batch_computation=true;
00676 use_linadd=true;
00677 use_bias=true;
00678
00679 SG_ADD((CSGObject**) &kernel, "kernel", "", MS_AVAILABLE);
00680 SG_ADD((CSGObject**) &m_custom_kernel, "custom_kernel", "Custom kernel for"
00681 " data lock", MS_NOT_AVAILABLE);
00682 SG_ADD((CSGObject**) &m_kernel_backup, "kernel_backup",
00683 "Kernel backup for data lock", MS_NOT_AVAILABLE);
00684 SG_ADD(&use_batch_computation, "use_batch_computation",
00685 "Batch computation is enabled.", MS_NOT_AVAILABLE);
00686 SG_ADD(&use_linadd, "use_linadd", "Linadd is enabled.", MS_NOT_AVAILABLE);
00687 SG_ADD(&use_bias, "use_bias", "Bias shall be used.", MS_NOT_AVAILABLE);
00688 SG_ADD(&m_bias, "m_bias", "Bias term.", MS_NOT_AVAILABLE);
00689 SG_ADD(&m_alpha, "m_alpha", "Array of coefficients alpha.",
00690 MS_NOT_AVAILABLE);
00691 SG_ADD(&m_svs, "m_svs", "Number of ``support vectors''.", MS_NOT_AVAILABLE);
00692
00693 m_parameter_map->put(
00694 new SGParamInfo("custom_kernel", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1),
00695 new SGParamInfo()
00696 );
00697 m_parameter_map->put(
00698 new SGParamInfo("kernel_backup", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1),
00699 new SGParamInfo()
00700 );
00701 m_parameter_map->finalize_map();
00702 }