GUIKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2008 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/ui/SGInterface.h>
00013 #include <shogun/ui/GUIKernel.h>
00014 #include <shogun/ui/GUIPluginEstimate.h>
00015 
00016 #include <shogun/lib/config.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/io/AsciiFile.h>
00019 #include <shogun/kernel/Kernel.h>
00020 #include <shogun/kernel/CombinedKernel.h>
00021 #include <shogun/kernel/Chi2Kernel.h>
00022 #include <shogun/kernel/LinearKernel.h>
00023 #include <shogun/kernel/LinearStringKernel.h>
00024 #include <shogun/kernel/WeightedDegreeStringKernel.h>
00025 #include <shogun/kernel/WeightedDegreeRBFKernel.h>
00026 #include <shogun/kernel/SpectrumMismatchRBFKernel.h>
00027 #include <shogun/kernel/WeightedDegreePositionStringKernel.h>
00028 #include <shogun/kernel/FixedDegreeStringKernel.h>
00029 #include <shogun/kernel/LocalityImprovedStringKernel.h>
00030 #include <shogun/kernel/SimpleLocalityImprovedStringKernel.h>
00031 #include <shogun/kernel/PolyKernel.h>
00032 #include <shogun/kernel/CustomKernel.h>
00033 #include <shogun/kernel/ConstKernel.h>
00034 #include <shogun/kernel/PolyMatchWordStringKernel.h>
00035 #include <shogun/kernel/PolyMatchStringKernel.h>
00036 #include <shogun/kernel/LocalAlignmentStringKernel.h>
00037 #include <shogun/kernel/MatchWordStringKernel.h>
00038 #include <shogun/kernel/CommWordStringKernel.h>
00039 #include <shogun/kernel/WeightedCommWordStringKernel.h>
00040 #include <shogun/kernel/CommUlongStringKernel.h>
00041 #include <shogun/kernel/HistogramWordStringKernel.h>
00042 #include <shogun/kernel/SalzbergWordStringKernel.h>
00043 #include <shogun/kernel/GaussianKernel.h>
00044 #include <shogun/kernel/GaussianShiftKernel.h>
00045 #include <shogun/kernel/SigmoidKernel.h>
00046 #include <shogun/kernel/DiagKernel.h>
00047 #include <shogun/kernel/OligoStringKernel.h>
00048 #include <shogun/kernel/DistanceKernel.h>
00049 #include <shogun/kernel/TensorProductPairKernel.h>
00050 #include <shogun/kernel/AvgDiagKernelNormalizer.h>
00051 #include <shogun/kernel/RidgeKernelNormalizer.h>
00052 #include <shogun/kernel/FirstElementKernelNormalizer.h>
00053 #include <shogun/kernel/IdentityKernelNormalizer.h>
00054 #include <shogun/kernel/SqrtDiagKernelNormalizer.h>
00055 #include <shogun/kernel/VarianceKernelNormalizer.h>
00056 #include <shogun/kernel/ScatterKernelNormalizer.h>
00057 #include <shogun/classifier/svm/SVM.h>
00058 #include <shogun/kernel/ZeroMeanCenterKernelNormalizer.h>
00059 #include <shogun/kernel/WaveletKernel.h>
00060 
00061 #include <string.h>
00062 
00063 using namespace shogun;
00064 
00065 CGUIKernel::CGUIKernel(CSGInterface* ui_)
00066 : CSGObject(), ui(ui_)
00067 {
00068     kernel=NULL;
00069 }
00070 
00071 CGUIKernel::~CGUIKernel()
00072 {
00073     SG_UNREF(kernel);
00074 }
00075 
00076 CKernel* CGUIKernel::get_kernel()
00077 {
00078     return kernel;
00079 }
00080 
00081 CKernel* CGUIKernel::create_oligo(int32_t size, int32_t k, float64_t width)
00082 {
00083     CKernel* kern=new COligoStringKernel(size, k, width);
00084     SG_DEBUG("created OligoStringKernel (%p) with size %d, k %d, width %f.\n", kern, size, k, width);
00085 
00086     return kern;
00087 }
00088 
00089 CKernel* CGUIKernel::create_diag(int32_t size, float64_t diag)
00090 {
00091     CKernel* kern=new CDiagKernel(size, diag);
00092     if (!kern)
00093         SG_ERROR("Couldn't create DiagKernel with size %d, diag %f.\n", size, diag);
00094     else
00095         SG_DEBUG("created DiagKernel (%p) with size %d, diag %f.\n", kern, size, diag);
00096 
00097     return kern;
00098 }
00099 
00100 CKernel* CGUIKernel::create_const(int32_t size, float64_t c)
00101 {
00102     CKernel* kern=new CConstKernel(c);
00103     if (!kern)
00104         SG_ERROR("Couldn't create ConstKernel with c %f.\n", c);
00105     else
00106         SG_DEBUG("created ConstKernel (%p) with c %f.\n", kern, c);
00107 
00108     kern->set_cache_size(size);
00109 
00110     return kern;
00111 }
00112 
00113 CKernel* CGUIKernel::create_custom(float64_t* kmatrix, int32_t num_feat, int32_t num_vec, bool source_is_diag, bool dest_is_diag)
00114 {
00115     CCustomKernel* kern=new CCustomKernel();
00116     SG_DEBUG("created CustomKernel (%p).\n", kern);
00117 
00118     SGMatrix<float64_t> km=SGMatrix<float64_t>(kmatrix, num_feat, num_vec);
00119 
00120     if (source_is_diag && dest_is_diag && num_feat==1)
00121     {
00122         kern->set_triangle_kernel_matrix_from_triangle(
00123                 SGVector<float64_t>(kmatrix, num_vec));
00124     }
00125     else if (!source_is_diag && dest_is_diag && num_vec==num_feat)
00126         kern->set_triangle_kernel_matrix_from_full(km);
00127     else
00128         kern->set_full_kernel_matrix_from_full(km);
00129 
00130     SG_FREE(kmatrix);
00131     return kern;
00132 }
00133 
00134 
00135 CKernel* CGUIKernel::create_gaussianshift(
00136     int32_t size, float64_t width, int32_t max_shift, int32_t shift_step)
00137 {
00138     CKernel* kern=new CGaussianShiftKernel(size, width, max_shift, shift_step);
00139     if (!kern)
00140         SG_ERROR("Couldn't create GaussianShiftKernel with size %d, width %f, max_shift %d, shift_step %d.\n", size, width, max_shift, shift_step);
00141     else
00142         SG_DEBUG("created GaussianShiftKernel (%p) with size %d, width %f, max_shift %d, shift_step %d.\n", kern, size, width, max_shift, shift_step);
00143 
00144     return kern;
00145 }
00146 
00147 CKernel* CGUIKernel::create_sparsegaussian(int32_t size, float64_t width)
00148 {
00149     CKernel* kern=new CGaussianKernel(size, width);
00150     if (!kern)
00151         SG_ERROR("Couldn't create GaussianKernel with size %d, width %f.\n", size, width);
00152     else
00153         SG_DEBUG("created GaussianKernel (%p) with size %d, width %f.\n", kern, size, width);
00154 
00155     return kern;
00156 }
00157 
00158 CKernel* CGUIKernel::create_gaussian(int32_t size, float64_t width)
00159 {
00160     CKernel* kern=new CGaussianKernel(size, width);
00161     if (!kern)
00162         SG_ERROR("Couldn't create GaussianKernel with size %d, width %f.\n", size, width);
00163     else
00164         SG_DEBUG("created GaussianKernel (%p) with size %d, width %f.\n", kern, size, width);
00165 
00166     return kern;
00167 }
00168 
00169 CKernel* CGUIKernel::create_sigmoid(
00170     int32_t size, float64_t gamma, float64_t coef0)
00171 {
00172     CKernel* kern=new CSigmoidKernel(size, gamma, coef0);
00173     if (!kern)
00174         SG_ERROR("Couldn't create SigmoidKernel with size %d, gamma %f, coef0 %f.\n", size, gamma, coef0);
00175     else
00176         SG_DEBUG("created SigmoidKernel (%p) with size %d, gamma %f, coef0 %f.\n", kern, size, gamma, coef0);
00177 
00178     return kern;
00179 }
00180 CKernel* CGUIKernel::create_wavelet(
00181     int32_t size, float64_t Wdilation, float64_t Wtranslation)
00182 {
00183     CKernel* kern=new CWaveletKernel(size, Wdilation, Wtranslation);
00184     if (!kern)
00185         SG_ERROR("Couldn't create WaveletKernel with size %d, Wdilation %f, Wtranslation %f.\n", size, Wdilation, Wtranslation);
00186     else
00187         SG_DEBUG("created WaveletKernel (%p) with size %d, Wdilation %f, Wtranslation %f.\n", kern, size, Wdilation, Wtranslation);
00188     
00189     return kern;
00190 }
00191 CKernel* CGUIKernel::create_sparsepoly(
00192     int32_t size, int32_t degree, bool inhomogene, bool normalize)
00193 {
00194     CKernel* kern=new CPolyKernel(size, degree, inhomogene);
00195     if (!normalize)
00196         kern->set_normalizer(new CIdentityKernelNormalizer());
00197     SG_DEBUG("created PolyKernel with size %d, degree %d, inhomogene %d normalize %d.\n", kern, size, degree, inhomogene, normalize);
00198 
00199     return kern;
00200 }
00201 
00202 CKernel* CGUIKernel::create_poly(
00203     int32_t size, int32_t degree, bool inhomogene, bool normalize)
00204 {
00205     CKernel* kern=new CPolyKernel(size, degree, inhomogene);
00206     if (!normalize)
00207         kern->set_normalizer(new CIdentityKernelNormalizer());
00208     SG_DEBUG("created PolyKernel (%p) with size %d, degree %d, inhomogene %d, normalize %d.\n", kern, size, degree, inhomogene, normalize);
00209 
00210     return kern;
00211 }
00212 
00213 CKernel* CGUIKernel::create_localityimprovedstring(
00214     int32_t size, int32_t length, int32_t inner_degree, int32_t outer_degree,
00215     EKernelType ktype)
00216 {
00217     CKernel* kern=NULL;
00218 
00219     if (ktype==K_SIMPLELOCALITYIMPROVED)
00220     {
00221         kern=new CSimpleLocalityImprovedStringKernel(
00222             size, length, inner_degree, outer_degree);
00223     }
00224     else if (ktype==K_LOCALITYIMPROVED)
00225     {
00226         kern=new CLocalityImprovedStringKernel(
00227             size, length, inner_degree, outer_degree);
00228     }
00229 
00230     if (!kern)
00231         SG_ERROR("Couldn't create (Simple)LocalityImprovedStringKernel with size %d, length %d, inner_degree %d, outer_degree %d.\n", size, length, inner_degree, outer_degree);
00232     else
00233         SG_DEBUG("created (Simple)LocalityImprovedStringKernel with size %d, length %d, inner_degree %d, outer_degree %d.\n", kern, size, length, inner_degree, outer_degree);
00234 
00235     return kern;
00236 }
00237 
00238 CKernel* CGUIKernel::create_weighteddegreestring(
00239     int32_t size, int32_t order, int32_t max_mismatch, bool use_normalization,
00240     int32_t mkl_stepsize, bool block_computation, int32_t single_degree)
00241 {
00242     float64_t* weights=get_weights(order, max_mismatch);
00243 
00244     int32_t i=0;
00245     if (single_degree>=0)
00246     {
00247         ASSERT(single_degree<order);
00248         for (i=0; i<order; i++)
00249         {
00250             if (i!=single_degree)
00251                 weights[i]=0;
00252             else
00253                 weights[i]=1;
00254         }
00255     }
00256 
00257     CKernel* kern=new CWeightedDegreeStringKernel(weights, order);
00258 
00259     SG_DEBUG("created WeightedDegreeStringKernel (%p) with size %d, order %d, "
00260             "max_mismatch %d, use_normalization %d, mkl_stepsize %d, "
00261             "block_computation %d, single_degree %d.\n",
00262             kern, size, order, max_mismatch, (int) use_normalization, mkl_stepsize,
00263             block_computation, single_degree);
00264 
00265     if (!use_normalization)
00266         kern->set_normalizer(new CIdentityKernelNormalizer());
00267         
00268     ((CWeightedDegreeStringKernel*) kern)->
00269         set_use_block_computation(block_computation);
00270     ((CWeightedDegreeStringKernel*) kern)->set_max_mismatch(max_mismatch);
00271     ((CWeightedDegreeStringKernel*) kern)->set_mkl_stepsize(mkl_stepsize);
00272     ((CWeightedDegreeStringKernel*) kern)->set_which_degree(single_degree);
00273 
00274     SG_FREE(weights);
00275     return kern;
00276 }
00277 
00278 CKernel* CGUIKernel::create_weighteddegreepositionstring(
00279     int32_t size, int32_t order, int32_t max_mismatch, int32_t length,
00280     int32_t center, float64_t step)
00281 {
00282     int32_t i=0;
00283     int32_t* shifts=SG_MALLOC(int32_t, length);
00284 
00285     for (i=center; i<length; i++)
00286         shifts[i]=(int32_t) floor(((float64_t) (i-center))/step);
00287 
00288     for (i=center-1; i>=0; i--)
00289         shifts[i]=(int32_t) floor(((float64_t) (center-i))/step);
00290 
00291     for (i=0; i<length; i++)
00292     {
00293         if (shifts[i]>length)
00294             shifts[i]=length;
00295     }
00296 
00297     for (i=0; i<length; i++)
00298         SG_INFO( "shift[%i]=%i\n", i, shifts[i]);
00299 
00300     float64_t* weights=get_weights(order, max_mismatch);
00301 
00302     CKernel* kern=new CWeightedDegreePositionStringKernel(size, weights, order, max_mismatch, shifts, length);
00303     if (!kern)
00304         SG_ERROR("Couldn't create WeightedDegreePositionStringKernel with size %d, order %d, max_mismatch %d, length %d, center %d, step %f.\n", size, order, max_mismatch, length, center, step);
00305     else
00306         SG_DEBUG("created WeightedDegreePositionStringKernel with size %d, order %d, max_mismatch %d, length %d, center %d, step %f.\n", kern, size, order, max_mismatch, length, center, step);
00307 
00308     SG_FREE(weights);
00309     SG_FREE(shifts);
00310     return kern;
00311 }
00312 
00313 CKernel* CGUIKernel::create_weighteddegreepositionstring3(
00314     int32_t size, int32_t order, int32_t max_mismatch, int32_t* shifts,
00315     int32_t length, int32_t mkl_stepsize, float64_t* position_weights)
00316 {
00317     float64_t* weights=get_weights(order, max_mismatch);
00318 
00319     CKernel* kern=new CWeightedDegreePositionStringKernel(size, weights, order, max_mismatch, shifts, length, mkl_stepsize);
00320     kern->set_normalizer(new CIdentityKernelNormalizer());
00321 
00322     SG_DEBUG("created WeightedDegreePositionStringKernel (%p) with size %d, order %d, max_mismatch %d, length %d and position_weights (MKL stepsize: %d).\n", kern, size, order, max_mismatch, length, mkl_stepsize);
00323 
00324     if (!position_weights)
00325     {
00326         position_weights=SG_MALLOC(float64_t, length);
00327         for (int32_t i=0; i<length; i++)
00328             position_weights[i]=1.0/length;
00329     }
00330     ((CWeightedDegreePositionStringKernel*) kern)->
00331         set_position_weights(SGVector<float64_t>(position_weights, length));
00332 
00333     SG_FREE(weights);
00334     return kern;
00335 }
00336 
00337 CKernel* CGUIKernel::create_weighteddegreepositionstring2(
00338     int32_t size, int32_t order, int32_t max_mismatch, int32_t* shifts,
00339     int32_t length, bool use_normalization)
00340 {
00341     float64_t* weights=get_weights(order, max_mismatch);
00342 
00343     CKernel* kern=new CWeightedDegreePositionStringKernel(size, weights, order, max_mismatch, shifts, length);
00344     if (!use_normalization)
00345         kern->set_normalizer(new CIdentityKernelNormalizer());
00346 
00347 
00348     SG_DEBUG("created WeightedDegreePositionStringKernel (%p) with size %d, order %d, max_mismatch %d, length %d, use_normalization %d.\n", kern, size, order, max_mismatch, length, use_normalization);
00349 
00350     SG_FREE(weights);
00351     return kern;
00352 }
00353 
00354 float64_t* CGUIKernel::get_weights(int32_t order, int32_t max_mismatch)
00355 {
00356     float64_t *weights=SG_MALLOC(float64_t, order*(1+max_mismatch));
00357     float64_t sum=0;
00358     int32_t i=0;
00359 
00360     for (i=0; i<order; i++)
00361     {
00362         weights[i]=order-i;
00363         sum+=weights[i];
00364     }
00365     for (i=0; i<order; i++)
00366         weights[i]/=sum;
00367     
00368     for (i=0; i<order; i++)
00369     {
00370         for (int32_t j=1; j<=max_mismatch; j++)
00371         {
00372             if (j<i+1)
00373             {
00374                 int32_t nk=CMath::nchoosek(i+1, j);
00375                 weights[i+j*order]=weights[i]/(nk*CMath::pow(3, j));
00376             }
00377             else
00378                 weights[i+j*order]=0;
00379         }
00380     }
00381 
00382     return weights;
00383 }
00384 
00385 CKernel* CGUIKernel::create_weighteddegreerbf(int32_t size, int32_t degree, int32_t nof_properties, float64_t width)
00386 {
00387     CKernel* kern=new CWeightedDegreeRBFKernel(size, width, degree, nof_properties);
00388     if (!kern)
00389         SG_ERROR("Couldn't create WeightedDegreeRBFKernel with size %d, width %f, degree %d, nof_properties %d.\n", size, width, degree, nof_properties);
00390     else
00391         SG_DEBUG("created WeightedDegreeRBFKernel (%p) with size %d, width %f, degree %d, nof_properties %d.\n", kern, size, width, degree, nof_properties);
00392 
00393     return kern;
00394 }
00395 
00396 CKernel* CGUIKernel::create_spectrummismatchrbf(int32_t size, float64_t* AA_matrix, int32_t nr, int32_t nc, int32_t max_mismatch, int32_t degree, float64_t width)
00397 {
00398 
00399   CKernel* kern = new CSpectrumMismatchRBFKernel(size, AA_matrix, nr, nc, degree, max_mismatch, width);
00400     if (!kern)
00401         SG_ERROR("Couldn't create SpectrumMismatchRBFKernel with size %d, width %f, degree %d, max_mismatch %d.\n", size, width, degree, max_mismatch);
00402     else
00403         SG_DEBUG("created SpectrumMismatchRBFKernel (%p) with size %d, width %f, degree %d, max_mismatch %d.\n", kern, size, width, degree, max_mismatch);
00404 
00405     return kern;
00406 
00407 }
00408 
00409 
00410 CKernel* CGUIKernel::create_localalignmentstring(int32_t size)
00411 {
00412     CKernel* kern=new CLocalAlignmentStringKernel(size);
00413     if (!kern)
00414         SG_ERROR("Couldn't create LocalAlignmentStringKernel with size %d.\n", size);
00415     else
00416         SG_DEBUG("created LocalAlignmentStringKernel (%p) with size %d.\n", kern, size);
00417 
00418     return kern;
00419 }
00420 
00421 CKernel* CGUIKernel::create_fixeddegreestring(int32_t size, int32_t d)
00422 {
00423     CKernel* kern=new CFixedDegreeStringKernel(size, d);
00424     if (!kern)
00425         SG_ERROR("Couldn't create FixedDegreeStringKernel with size %d and d %d.\n", size, d);
00426     else
00427         SG_DEBUG("created FixedDegreeStringKernel (%p) with size %d and d %d.\n", kern, size, d);
00428 
00429     return kern;
00430 }
00431 
00432 CKernel* CGUIKernel::create_chi2(int32_t size, float64_t width)
00433 {
00434     CKernel* kern=new CChi2Kernel(size, width);
00435     if (!kern)
00436         SG_ERROR("Couldn't create Chi2Kernel with size %d and width %f.\n", size, width);
00437     else
00438         SG_DEBUG("created Chi2Kernel (%p) with size %d and width %f.\n", kern, size, width);
00439 
00440     return kern;
00441 }
00442 
00443 CKernel* CGUIKernel::create_commstring(
00444     int32_t size, bool use_sign, char* norm_str, EKernelType ktype)
00445 {
00446     CKernel* kern=NULL;
00447 
00448     if (!norm_str)
00449         norm_str= (char*) "FULL";
00450 
00451     if (ktype==K_COMMULONGSTRING)
00452         kern=new CCommUlongStringKernel(size, use_sign);
00453     else if (ktype==K_COMMWORDSTRING)
00454         kern=new CCommWordStringKernel(size, use_sign);
00455     else if (ktype==K_WEIGHTEDCOMMWORDSTRING)
00456         kern=new CWeightedCommWordStringKernel(size, use_sign);
00457 
00458     SG_DEBUG("created WeightedCommWord/CommWord/CommUlongStringKernel (%p) with size %d, use_sign  %d norm_str %s.\n", kern, size, use_sign, norm_str);
00459 
00460 
00461     if (strncmp(norm_str, "NO", 2)==0)
00462     {
00463         kern->set_normalizer(new CIdentityKernelNormalizer());
00464     }
00465     else if (strncmp(norm_str, "FULL", 4)==0)
00466     {
00467         //nop, as this one is default
00468     }
00469     else
00470         SG_ERROR("Unsupported Normalizer requested, supports only FULL and NO\n");
00471 
00472     return kern;
00473 }
00474 
00475 CKernel* CGUIKernel::create_matchwordstring(
00476     int32_t size, int32_t d, bool normalize)
00477 {
00478     CKernel* kern=new CMatchWordStringKernel(size, d);
00479     SG_DEBUG("created MatchWordStringKernel (%p) with size %d and d %d.\n", kern, size, d);
00480     if (!normalize)
00481         kern->set_normalizer(new CIdentityKernelNormalizer());
00482 
00483     return kern;
00484 }
00485 
00486 CKernel* CGUIKernel::create_polymatchstring(
00487     int32_t size, int32_t degree, bool inhomogene, bool normalize)
00488 {
00489     CKernel* kern=new CPolyMatchStringKernel(size, degree, inhomogene);
00490     SG_DEBUG("created PolyMatchStringKernel (%p) with size %d, degree %d, inhomogene %d normalize %d.\n", kern, size, degree, inhomogene, normalize);
00491     if (!normalize)
00492         kern->set_normalizer(new CIdentityKernelNormalizer());
00493 
00494     return kern;
00495 }
00496 
00497 CKernel* CGUIKernel::create_polymatchwordstring(
00498     int32_t size, int32_t degree, bool inhomogene, bool normalize)
00499 {
00500     CKernel* kern=new CPolyMatchWordStringKernel(size, degree, inhomogene);
00501     SG_DEBUG("created PolyMatchWordStringKernel (%p) with size %d, degree %d, inhomogene %d, normalize %d.\n", kern, size, degree, inhomogene, normalize);
00502     if (!normalize)
00503         kern->set_normalizer(new CIdentityKernelNormalizer());
00504 
00505     return kern;
00506 }
00507 
00508 CKernel* CGUIKernel::create_salzbergword(int32_t size)
00509 {
00510     SG_INFO("Getting estimator.\n");
00511     CPluginEstimate* estimator=ui->ui_pluginestimate->get_estimator();
00512     if (!estimator)
00513         SG_ERROR("No estimator set.\n");
00514 
00515     CKernel* kern=new CSalzbergWordStringKernel(size, estimator);
00516     if (!kern)
00517         SG_ERROR("Couldn't create SalzbergWordString with size %d.\n", size);
00518     else
00519         SG_DEBUG("created SalzbergWordString (%p) with size %d.\n", kern, size);
00520 
00521 /*
00522     // prior stuff
00523     SG_INFO("Getting labels.\n");
00524     CLabels* train_labels=ui->ui_labels->get_train_labels();
00525     if (!train_labels)
00526     {
00527         SG_INFO("Assign train labels first!\n");
00528         return NULL;
00529     }
00530     ((CSalzbergWordStringKernel *) kern)->set_prior_probs_from_labels(train_labels);
00531 */
00532 
00533     return kern;
00534 }
00535 
00536 CKernel* CGUIKernel::create_histogramword(int32_t size)
00537 {
00538     SG_INFO("Getting estimator.\n");
00539     CPluginEstimate* estimator=ui->ui_pluginestimate->get_estimator();
00540     if (!estimator)
00541         SG_ERROR("No estimator set.\n");
00542 
00543     CKernel* kern=new CHistogramWordStringKernel(size, estimator);
00544     if (!kern)
00545         SG_ERROR("Couldn't create HistogramWordString with size %d.\n", size);
00546     else
00547         SG_DEBUG("created HistogramWordString (%p) with size %d.\n", kern, size);
00548 
00549     return kern;
00550 }
00551 
00552 CKernel* CGUIKernel::create_linearbyte(int32_t size, float64_t scale)
00553 {
00554     size=0;
00555     CKernel* kern=new CLinearKernel();
00556     kern->set_normalizer(new CAvgDiagKernelNormalizer(scale));
00557     SG_DEBUG("created LinearByteKernel (%p) with size %d and scale %f.\n", kern, size, scale);
00558 
00559     return kern;
00560 }
00561 
00562 CKernel* CGUIKernel::create_linearword(int32_t size, float64_t scale)
00563 {
00564     size=0;
00565     CKernel* kern=new CLinearKernel();
00566     kern->set_normalizer(new CAvgDiagKernelNormalizer(scale));
00567     SG_DEBUG("created LinearWordKernel (%p) with size %d and scale %f.\n", kern, size, scale);
00568 
00569     return kern;
00570 }
00571 
00572 CKernel* CGUIKernel::create_linearstring(int32_t size, float64_t scale)
00573 {
00574     size=0;
00575     CKernel* kern=NULL;
00576     kern=new CLinearStringKernel();
00577     kern->set_normalizer(new CAvgDiagKernelNormalizer(scale));
00578 
00579     SG_DEBUG("created LinearStringKernel (%p) with size %d and scale %f.\n", kern, size, scale);
00580 
00581     return kern;
00582 }
00583 
00584 CKernel* CGUIKernel::create_linear(int32_t size, float64_t scale)
00585 {
00586     size=0;
00587     CKernel* kern=new CLinearKernel();
00588     kern->set_normalizer(new CAvgDiagKernelNormalizer(scale));
00589 
00590     SG_DEBUG("created LinearKernel (%p) with size %d and scale %f.\n", kern, size, scale);
00591 
00592     return kern;
00593 }
00594 
00595 CKernel* CGUIKernel::create_sparselinear(int32_t size, float64_t scale)
00596 {
00597     size=0;
00598     CKernel* kern=new CLinearKernel();
00599     kern->set_normalizer(new CAvgDiagKernelNormalizer(scale));
00600 
00601     SG_DEBUG("created LinearKernel (%p) with size %d and scale %f.\n", kern, size, scale);
00602 
00603     return kern;
00604 }
00605 
00606 CKernel* CGUIKernel::create_tppk(int32_t size, float64_t* km, int32_t rows, int32_t cols)
00607 {
00608     CCustomKernel* k=new CCustomKernel();
00609     k->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(km, rows, cols));
00610 
00611     CKernel* kern=new CTensorProductPairKernel(size, k);
00612 
00613     SG_DEBUG("created TPPK (%p) with size %d and km %p, rows %d, cols %d.\n", kern, size, km, rows, cols);
00614 
00615     return kern;
00616 }
00617 
00618 CKernel* CGUIKernel::create_distance(int32_t size, float64_t width)
00619 {
00620     CDistance* dist=ui->ui_distance->get_distance();
00621     if (!dist)
00622         SG_ERROR("No distance set for DistanceKernel.\n");
00623 
00624     CKernel* kern=new CDistanceKernel(size, width, dist);
00625     if (!kern)
00626         SG_ERROR("Couldn't create DistanceKernel with size %d and width %f.\n", size, width);
00627     else
00628         SG_DEBUG("created DistanceKernel (%p) with size %d and width %f.\n", kern, size, width);
00629 
00630     return kern;
00631 }
00632 
00633 CKernel* CGUIKernel::create_combined(
00634     int32_t size, bool append_subkernel_weights)
00635 {
00636     CKernel* kern=new CCombinedKernel(size, append_subkernel_weights);
00637     if (!kern)
00638         SG_ERROR("Couldn't create CombinedKernel with size %d and append_subkernel_weights %d.\n", size, append_subkernel_weights);
00639     else
00640         SG_DEBUG("created CombinedKernel (%p) with size %d and append_subkernel_weights %d.\n", kern, size, append_subkernel_weights);
00641 
00642     return kern;
00643 }
00644 
00645 bool CGUIKernel::set_normalization(char* normalization, float64_t c, float64_t r)
00646 {
00647     CKernel* k=kernel;
00648 
00649     if (k && k->get_kernel_type()==K_COMBINED)
00650         k=((CCombinedKernel*) kernel)->get_last_kernel();
00651 
00652     if (!k)
00653         SG_ERROR("No kernel available.\n");
00654 
00655     if (strncmp(normalization, "IDENTITY", 8)==0)
00656     {
00657         SG_INFO("Identity Normalization (==NO NORMALIZATION) selected\n");
00658         return k->set_normalizer(new CIdentityKernelNormalizer());
00659     }
00660     else if (strncmp(normalization,"AVGDIAG", 7)==0)
00661     {
00662         SG_INFO("Average Kernel Diagonal Normalization selected\n");
00663         return k->set_normalizer(new CAvgDiagKernelNormalizer(c));
00664     }
00665     else if (strncmp(normalization,"RIDGE", 5)==0)
00666     {
00667         SG_INFO("Ridge Kernel Normalization selected\n");
00668         return k->set_normalizer(new CRidgeKernelNormalizer(r, c));
00669     }
00670     else if (strncmp(normalization,"SQRTDIAG", 8)==0)
00671     {
00672         SG_INFO("Sqrt Diagonal Normalization selected\n");
00673         return k->set_normalizer(new CSqrtDiagKernelNormalizer());
00674     }
00675     else if (strncmp(normalization,"FIRSTELEMENT", 12)==0)
00676     {
00677         SG_INFO("First Element Normalization selected\n");
00678         return k->set_normalizer(new CFirstElementKernelNormalizer());
00679     }
00680     else if (strncmp(normalization,"VARIANCE", 8)==0)
00681     {
00682         SG_INFO("Variance Normalization selected\n");
00683         return k->set_normalizer(new CVarianceKernelNormalizer());
00684     }
00685     else if (strncmp(normalization,"SCATTER", 7)==0)
00686     {
00687         SG_INFO("Scatter Normalization selected\n");
00688         CLabels* train_labels=ui->ui_labels->get_train_labels();
00689         ASSERT(train_labels);
00690         return k->set_normalizer(new CScatterKernelNormalizer(c,r, train_labels));
00691     }
00692     else if (strncmp(normalization,"ZEROMEANCENTER", 13)==0)
00693     {
00694         SG_INFO("Zero Mean Center Normalization selected\n");
00695         return k->set_normalizer(new CZeroMeanCenterKernelNormalizer());
00696     }
00697     else
00698         SG_ERROR("Wrong kernel normalizer name.\n");
00699 
00700     SG_UNREF(k);
00701 
00702     return false;
00703 }
00704 
00705 bool CGUIKernel::set_kernel(CKernel* kern)
00706 {
00707     if (kern)
00708     {
00709         SG_DEBUG("deleting old kernel (%p).\n", kernel);
00710         SG_UNREF(kernel);
00711         SG_REF(kern);
00712         kernel=kern;
00713         SG_DEBUG("set new kernel (%p).\n", kern);
00714 
00715         return true;
00716     }
00717     else
00718         return false;
00719 }
00720 
00721 bool CGUIKernel::init_kernel_optimization()
00722 {
00723     CSVM* svm=(CSVM*) ui->ui_classifier->get_classifier();
00724     if (svm)
00725     {
00726         if (kernel->has_property(KP_LINADD))
00727         {
00728             int32_t num_sv=svm->get_num_support_vectors();
00729             int32_t* sv_idx=SG_MALLOC(int32_t, num_sv);
00730             float64_t* sv_weight=SG_MALLOC(float64_t, num_sv);
00731             
00732             for (int32_t i=0; i<num_sv; i++)
00733             {
00734                 sv_idx[i]=svm->get_support_vector(i);
00735                 sv_weight[i]=svm->get_alpha(i);
00736             }
00737 
00738             bool ret=kernel->init_optimization(num_sv, sv_idx, sv_weight);
00739 
00740             SG_FREE(sv_idx);
00741             SG_FREE(sv_weight);
00742 
00743             if (!ret)
00744                 SG_ERROR("Initialization of kernel optimization failed\n");
00745             return ret;
00746         }
00747     }
00748     else
00749         SG_ERROR("Create SVM first!\n");
00750 
00751     return true;
00752 }
00753 
00754 bool CGUIKernel::delete_kernel_optimization()
00755 {
00756     if (kernel && kernel->has_property(KP_LINADD) && kernel->get_is_initialized())
00757         kernel->delete_optimization();
00758 
00759     return true;
00760 }
00761 
00762 
00763 bool CGUIKernel::init_kernel(const char* target)
00764 {
00765     if (!kernel)
00766         SG_ERROR("No kernel available.\n");
00767 
00768     // no need to init custom kernel
00769     if (kernel->get_kernel_type() == K_CUSTOM || !target)
00770     {
00771         initialized=true;
00772         return true;
00773     }
00774 
00775     EFeatureClass k_fclass=kernel->get_feature_class();
00776     EFeatureType k_ftype=kernel->get_feature_type();
00777 
00778     if (!strncmp(target, "TRAIN", 5))
00779     {
00780         CFeatures* train=ui->ui_features->get_train_features();
00781 
00782         if (train)
00783         {
00784             EFeatureClass fclass=train->get_feature_class();
00785             EFeatureType ftype=train->get_feature_type();
00786             if ((k_fclass==fclass || k_fclass==C_ANY || fclass==C_ANY) &&
00787                 (k_ftype==ftype || k_ftype==F_ANY || ftype==F_ANY))
00788             
00789             {
00790                 SG_INFO("Initialising kernel with TRAIN DATA, train: %p\n", train);
00791                 kernel->init(train, train);
00792                 initialized=true;
00793             }
00794             else
00795                 SG_ERROR("Kernel can not process this train feature type: %d %d.\n", fclass, ftype);
00796         }
00797         else
00798             SG_DEBUG("Not initing kernel - no train features assigned.\n");
00799     }
00800     else if (!strncmp(target, "TEST", 4))
00801     {
00802         CFeatures* train=ui->ui_features->get_train_features();
00803         CFeatures* test=ui->ui_features->get_test_features();
00804         if (train && test)
00805         {
00806             EFeatureClass fclass=test->get_feature_class();
00807             EFeatureType ftype=test->get_feature_type();
00808             if ((k_fclass==fclass || k_fclass==C_ANY || fclass==C_ANY) &&
00809                 (k_ftype==ftype || k_ftype==F_ANY || ftype==F_ANY))
00810             
00811             {
00812                 if (!initialized)
00813                 {
00814                     EFeatureClass tr_fclass=train->get_feature_class();
00815                     EFeatureType tr_ftype=train->get_feature_type();
00816                     if ((k_fclass==tr_fclass || k_fclass==C_ANY || tr_fclass==C_ANY) &&
00817                             (k_ftype==tr_ftype || k_ftype==F_ANY || tr_ftype==F_ANY))
00818                     {
00819                         SG_INFO("Initialising kernel with TRAIN DATA, train: %p\n", train);
00820                         kernel->init(train, train);
00821                         initialized=true;
00822                     }
00823                     else
00824                         SG_ERROR("Kernel can not process this train feature type: %d %d.\n", fclass, ftype);
00825                 }
00826                 
00827                 SG_INFO("Initialising kernel with TEST DATA, train: %p test %p\n", train, test);
00828                 // lhs -> always train_features; rhs -> always test_features
00829                 kernel->init(train, test);
00830             }
00831             else
00832                 SG_ERROR("Kernel can not process this test feature type: %d %d.\n", fclass, ftype);
00833         }
00834         else
00835             SG_DEBUG("Not initing kernel - no train and test features assigned.\n");
00836     }
00837     else
00838         SG_ERROR("Unknown target %s.\n", target);
00839 
00840     return true;
00841 }
00842 
00843 bool CGUIKernel::save_kernel(char* filename)
00844 {
00845     if (kernel && initialized)
00846     {
00847         CAsciiFile* file=new CAsciiFile(filename);
00848         try
00849         {
00850             kernel->save(file);
00851         }
00852         catch (...)
00853         {
00854             SG_ERROR("Writing to file %s failed!\n", filename);
00855         }
00856 
00857         SG_UNREF(file);
00858         SG_INFO("Successfully written kernel to \"%s\" !\n", filename);
00859         return true;
00860     }
00861     else
00862         SG_ERROR("No kernel set / kernel not initialized!\n");
00863 
00864     return false;
00865 }
00866 
00867 bool CGUIKernel::add_kernel(CKernel* kern, float64_t weight)
00868 {
00869     if (!kern)
00870         SG_ERROR("Given kernel to add is invalid.\n");
00871 
00872     if (!kernel)
00873     {
00874         kernel= new CCombinedKernel(20, false);
00875         SG_REF(kernel);
00876     }
00877 
00878     if (kernel->get_kernel_type()!=K_COMBINED)
00879     {
00880         CKernel* first_elem=kernel;
00881         kernel= new CCombinedKernel(20, false);
00882         SG_REF(kernel);
00883         ((CCombinedKernel*) kernel)->append_kernel(first_elem);
00884     }
00885 
00886     if (!kernel)
00887         SG_ERROR("Combined kernel object could not be created.\n");
00888 
00889     kern->set_combined_kernel_weight(weight);
00890 
00891     bool success=((CCombinedKernel*) kernel)->append_kernel(kern);
00892 
00893     initialized=true;
00894     if (success)
00895         ((CCombinedKernel*) kernel)->list_kernels();
00896     else
00897         SG_ERROR("Adding of kernel failed.\n");
00898 
00899     return success;
00900 }
00901 
00902 
00903 bool CGUIKernel::del_last_kernel()
00904 {
00905     if (!kernel)
00906         SG_ERROR("No kernel available.\n");
00907 
00908     if (kernel->get_kernel_type()!=K_COMBINED)
00909         SG_ERROR("Need a combined kernel for deleting the last kernel in it.\n");
00910 
00911     CKernel* last=((CCombinedKernel*) kernel)->get_last_kernel();
00912     if (last)
00913         return ((CCombinedKernel*) kernel)->delete_kernel();
00914     else
00915         SG_ERROR("No kernel available to delete.\n");
00916 
00917     return false;
00918 }
00919 
00920 bool CGUIKernel::clean_kernel()
00921 {
00922     SG_UNREF(kernel);
00923     kernel=NULL;
00924     return true;
00925 }
00926 
00927 #ifdef USE_SVMLIGHT
00928 bool CGUIKernel::resize_kernel_cache(int32_t size)
00929 {
00930     if (!kernel)
00931         SG_ERROR("No kernel available.\n");
00932 
00933     kernel->resize_kernel_cache(size);
00934     return true;
00935 }
00936 #endif //USE_SVMLIGHT
00937 
00938 bool CGUIKernel::set_optimization_type(char* opt_type)
00939 {
00940     EOptimizationType opt=SLOWBUTMEMEFFICIENT;
00941     if (!kernel)
00942         SG_ERROR("No kernel available.\n");
00943 
00944     if (strncmp(opt_type, "FASTBUTMEMHUNGRY", 16)==0)
00945     {
00946         SG_INFO("FAST METHOD selected\n");
00947         opt=FASTBUTMEMHUNGRY;
00948         kernel->set_optimization_type(opt);
00949 
00950         return true;
00951     }
00952     else if (strncmp(opt_type,"SLOWBUTMEMEFFICIENT", 19)==0)
00953     {
00954         SG_INFO("MEMORY EFFICIENT METHOD selected\n");
00955         opt=SLOWBUTMEMEFFICIENT;
00956         kernel->set_optimization_type(opt);
00957 
00958         return true;
00959     }
00960     else
00961         SG_ERROR("Wrong kernel optimization type.\n");
00962 
00963     return false;
00964 }
00965 
00966 bool CGUIKernel::precompute_subkernels()
00967 {
00968     if (!kernel)
00969         SG_ERROR("No kernel available.\n");
00970 
00971     if (kernel->get_kernel_type()!=K_COMBINED)
00972         SG_ERROR("Not a combined kernel.\n");
00973 
00974     return ((CCombinedKernel*) kernel)->precompute_subkernels();
00975 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation