SHOGUN: KernelMachine.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/machine/KernelMachine.h>
00012 #include <shogun/lib/Signal.h>
00013 #include <shogun/base/Parameter.h>
00014 
00015 using namespace shogun;
00016 
00017 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00018 struct S_THREAD_PARAM
00019 {
00020     CKernelMachine* kernel_machine;
00021     CLabels* result;
00022     int32_t start;
00023     int32_t end;
00024     bool verbose;
00025 };
00026 #endif // DOXYGEN_SHOULD_SKIP_THIS
00027 
00028 CKernelMachine::CKernelMachine()
00029 : CMachine(), kernel(NULL), use_batch_computation(true), use_linadd(true), use_bias(true)
00030 {
00031     SG_ADD((CSGObject**) &kernel, "kernel", "", MS_AVAILABLE);
00032     SG_ADD(&use_batch_computation, "use_batch_computation",
00033             "Batch computation is enabled.", MS_NOT_AVAILABLE);
00034     SG_ADD(&use_linadd, "use_linadd", "Linadd is enabled.", MS_NOT_AVAILABLE);
00035     SG_ADD(&use_bias, "use_bias", "Bias shall be used.", MS_NOT_AVAILABLE);
00036     SG_ADD(&m_bias, "m_bias", "Bias term.", MS_NOT_AVAILABLE);
00037     SG_ADD(&m_alpha, "m_alpha", "Array of coefficients alpha.",
00038             MS_NOT_AVAILABLE);
00039     SG_ADD(&m_svs, "m_svs", "Number of ``support vectors''.", MS_NOT_AVAILABLE);
00040 
00041     m_bias=0.0;
00042 }
00043 
00044 CKernelMachine::~CKernelMachine()
00045 {
00046     SG_UNREF(kernel);
00047 
00048     SG_FREE(m_alpha.vector);
00049     SG_FREE(m_svs.vector);
00050 }
00051 
00052 bool CKernelMachine::init_kernel_optimization()
00053 {
00054     int32_t num_sv=get_num_support_vectors();
00055 
00056     if (kernel && kernel->has_property(KP_LINADD) && num_sv>0)
00057     {
00058         int32_t * sv_idx    = SG_MALLOC(int32_t, num_sv);
00059         float64_t* sv_weight = SG_MALLOC(float64_t, num_sv);
00060 
00061         for(int32_t i=0; i<num_sv; i++)
00062         {
00063             sv_idx[i]    = get_support_vector(i) ;
00064             sv_weight[i] = get_alpha(i) ;
00065         }
00066 
00067         bool ret = kernel->init_optimization(num_sv, sv_idx, sv_weight) ;
00068 
00069         SG_FREE(sv_idx);
00070         SG_FREE(sv_weight);
00071 
00072         if (!ret)
00073             SG_ERROR( "initialization of kernel optimization failed\n");
00074 
00075         return ret;
00076     }
00077     else
00078         SG_ERROR( "initialization of kernel optimization failed\n");
00079 
00080     return false;
00081 }
00082 
00083 CLabels* CKernelMachine::apply()
00084 {
00085     CLabels* lab=NULL;
00086 
00087     if (!kernel)
00088         SG_ERROR( "Kernelmachine can not proceed without kernel!\n");
00089 
00090     if ( kernel && kernel->get_num_vec_rhs()>0 )
00091     {
00092         int32_t num_vectors=kernel->get_num_vec_rhs();
00093 
00094         lab=new CLabels(num_vectors);
00095         SG_DEBUG( "computing output on %d test examples\n", num_vectors);
00096 
00097         CSignal::clear_cancel();
00098 
00099         if (io->get_show_progress())
00100             io->enable_progress();
00101         else
00102             io->disable_progress();
00103 
00104         if (kernel->has_property(KP_BATCHEVALUATION) &&
00105                 get_batch_computation_enabled())
00106         {
00107             float64_t* output=SG_MALLOC(float64_t, num_vectors);
00108             memset(output, 0, sizeof(float64_t)*num_vectors);
00109 
00110             if (get_num_support_vectors()>0)
00111             {
00112                 int32_t* sv_idx=SG_MALLOC(int32_t, get_num_support_vectors());
00113                 float64_t* sv_weight=SG_MALLOC(float64_t, get_num_support_vectors());
00114                 int32_t* idx=SG_MALLOC(int32_t, num_vectors);
00115 
00116                 //compute output for all vectors v[0]...v[num_vectors-1]
00117                 for (int32_t i=0; i<num_vectors; i++)
00118                     idx[i]=i;
00119 
00120                 for (int32_t i=0; i<get_num_support_vectors(); i++)
00121                 {
00122                     sv_idx[i]    = get_support_vector(i) ;
00123                     sv_weight[i] = get_alpha(i) ;
00124                 }
00125 
00126                 kernel->compute_batch(num_vectors, idx,
00127                         output, get_num_support_vectors(), sv_idx, sv_weight);
00128                 SG_FREE(sv_idx);
00129                 SG_FREE(sv_weight);
00130                 SG_FREE(idx);
00131             }
00132 
00133             for (int32_t i=0; i<num_vectors; i++)
00134                 lab->set_label(i, get_bias()+output[i]);
00135 
00136             SG_FREE(output);
00137         }
00138         else
00139         {
00140             int32_t num_threads=parallel->get_num_threads();
00141             ASSERT(num_threads>0);
00142 
00143             if (num_threads < 2)
00144             {
00145                 S_THREAD_PARAM params;
00146                 params.kernel_machine=this;
00147                 params.result=lab;
00148                 params.start=0;
00149                 params.end=num_vectors;
00150                 params.verbose=true;
00151                 apply_helper((void*) &params);
00152             }
00153 #ifdef HAVE_PTHREAD
00154             else
00155             {
00156                 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
00157                 S_THREAD_PARAM* params = SG_MALLOC(S_THREAD_PARAM, num_threads);
00158                 int32_t step= num_vectors/num_threads;
00159 
00160                 int32_t t;
00161 
00162                 for (t=0; t<num_threads-1; t++)
00163                 {
00164                     params[t].kernel_machine = this;
00165                     params[t].result = lab;
00166                     params[t].start = t*step;
00167                     params[t].end = (t+1)*step;
00168                     params[t].verbose = false;
00169                     pthread_create(&threads[t], NULL,
00170                             CKernelMachine::apply_helper, (void*)&params[t]);
00171                 }
00172 
00173                 params[t].kernel_machine = this;
00174                 params[t].result = lab;
00175                 params[t].start = t*step;
00176                 params[t].end = num_vectors;
00177                 params[t].verbose = true;
00178                 apply_helper((void*) &params[t]);
00179 
00180                 for (t=0; t<num_threads-1; t++)
00181                     pthread_join(threads[t], NULL);
00182 
00183                 SG_FREE(params);
00184                 SG_FREE(threads);
00185             }
00186 #endif
00187         }
00188 
00189 #ifndef WIN32
00190         if ( CSignal::cancel_computations() )
00191             SG_INFO( "prematurely stopped.           \n");
00192         else
00193 #endif
00194             SG_DONE();
00195     }
00196     else
00197         return NULL;
00198 
00199     return lab;
00200 }
00201 
00202 float64_t CKernelMachine::apply(int32_t num)
00203 {
00204     ASSERT(kernel);
00205 
00206     if (kernel->has_property(KP_LINADD) && (kernel->get_is_initialized()))
00207     {
00208         float64_t score = kernel->compute_optimized(num);
00209         return score+get_bias();
00210     }
00211     else
00212     {
00213         float64_t score=0;
00214         for(int32_t i=0; i<get_num_support_vectors(); i++)
00215             score+=kernel->kernel(get_support_vector(i), num)*get_alpha(i);
00216 
00217         return score+get_bias();
00218     }
00219 }
00220 
00221 
00222 CLabels* CKernelMachine::apply(CFeatures* data)
00223 {
00224     if (!kernel)
00225         SG_ERROR("No kernel assigned!\n");
00226 
00227     CFeatures* lhs=kernel->get_lhs();
00228     if (!lhs || !lhs->get_num_vectors())
00229     {
00230         SG_UNREF(lhs);
00231         SG_ERROR("No vectors on left hand side\n");
00232     }
00233     kernel->init(lhs, data);
00234     SG_UNREF(lhs);
00235 
00236     return apply();
00237 }
00238 
00239 void* CKernelMachine::apply_helper(void* p)
00240 {
00241     S_THREAD_PARAM* params= (S_THREAD_PARAM*) p;
00242     CLabels* result=params->result;
00243     CKernelMachine* kernel_machine=params->kernel_machine;
00244 
00245 #ifdef WIN32
00246     for (int32_t vec=params->start; vec<params->end; vec++)
00247 #else
00248     for (int32_t vec=params->start; vec<params->end &&
00249             !CSignal::cancel_computations(); vec++)
00250 #endif
00251     {
00252         if (params->verbose)
00253         {
00254             int32_t num_vectors=params->end - params->start;
00255             int32_t v=vec-params->start;
00256             if ( (v% (num_vectors/100+1))== 0)
00257                 SG_SPROGRESS(v, 0.0, num_vectors-1);
00258         }
00259 
00260         result->set_label(vec, kernel_machine->apply(vec));
00261     }
00262 
00263     return NULL;
00264 }
00265 
00266 void CKernelMachine::store_model_features()
00267 {
00268     if (!kernel)
00269         SG_ERROR("kernel is needed to store SV features.\n");
00270 
00271     CFeatures* lhs=kernel->get_lhs();
00272     CFeatures* rhs=kernel->get_rhs();
00273 
00274     if (!lhs)
00275         SG_ERROR("kernel lhs is needed to store SV features.\n");
00276 
00277     /* copy sv feature data */
00278     CFeatures* sv_features=lhs->copy_subset(m_svs);
00279     SG_UNREF(lhs);
00280 
00281     /* now sv indices are just the identity */
00282     CMath::range_fill_vector(m_svs.vector, m_svs.vlen, 0);
00283 
00284     /* set new lhs to kernel */
00285     kernel->init(sv_features, rhs);
00286 
00287     SG_UNREF(rhs);
00288 }