WeightedDegreePositionStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "lib/io.h"
00014 #include "lib/Signal.h"
00015 #include "lib/Trie.h"
00016 #include "base/Parallel.h"
00017 
00018 #include "kernel/WeightedDegreePositionStringKernel.h"
00019 #include "kernel/SqrtDiagKernelNormalizer.h"
00020 #include "features/Features.h"
00021 #include "features/StringFeatures.h"
00022 
00023 #include "classifier/svm/SVM.h"
00024 
00025 #ifndef WIN32
00026 #include <pthread.h>
00027 #endif
00028 
00029 using namespace shogun;
00030 
00031 #define TRIES(X) ((use_poim_tries) ? (poim_tries.X) : (tries.X))
00032 
00033 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00034 template <class Trie> struct S_THREAD_PARAM 
00035 {
00036     int32_t* vec;
00037     float64_t* result;
00038     float64_t* weights;
00039     CWeightedDegreePositionStringKernel* kernel;
00040     CTrie<Trie>* tries;
00041     float64_t factor;
00042     int32_t j;
00043     int32_t start;
00044     int32_t end;
00045     int32_t length;
00046     int32_t max_shift;
00047     int32_t* shift;
00048     int32_t* vec_idx;
00049 };
00050 #endif // DOXYGEN_SHOULD_SKIP_THIS
00051 
00052 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00053     void)
00054 : CStringKernel<char>()
00055 {
00056     init();
00057 }
00058 
00059 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00060     int32_t size, int32_t d, int32_t mm, int32_t mkls)
00061 : CStringKernel<char>(size)
00062 {
00063     init();
00064 
00065     mkl_stepsize=mkls;
00066     degree=d;
00067     max_mismatch=mm;
00068 
00069     tries=CTrie<DNATrie>(d);
00070     poim_tries=CTrie<POIMTrie>(d);
00071 
00072     set_wd_weights();
00073     ASSERT(weights);
00074 }
00075 
00076 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00077     int32_t size, float64_t* w, int32_t d, int32_t mm, int32_t* s, int32_t sl,
00078     int32_t mkls)
00079 : CStringKernel<char>(size)
00080 {
00081     init();
00082 
00083     mkl_stepsize=mkls;
00084     degree=d;
00085     max_mismatch=mm;
00086 
00087     tries=CTrie<DNATrie>(d);
00088     poim_tries=CTrie<POIMTrie>(d);
00089 
00090     weights=new float64_t[d*(1+max_mismatch)];
00091     weights_degree=degree;
00092     weights_length=(1+max_mismatch);
00093 
00094     for (int32_t i=0; i<d*(1+max_mismatch); i++)
00095         weights[i]=w[i];
00096 
00097     set_shifts(s, sl);
00098 }
00099 
00100 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00101     CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d)
00102 : CStringKernel<char>()
00103 {
00104     init();
00105 
00106     mkl_stepsize=1;
00107     degree=d;
00108 
00109     tries=CTrie<DNATrie>(d);
00110     poim_tries=CTrie<POIMTrie>(d);
00111 
00112     set_wd_weights();
00113     ASSERT(weights);
00114 
00115     init(l, r);
00116 }
00117 
00118 
00119 CWeightedDegreePositionStringKernel::~CWeightedDegreePositionStringKernel()
00120 {
00121     cleanup();
00122     cleanup_POIM2();
00123 
00124     delete[] shift;
00125     shift=NULL;
00126 
00127     delete[] weights;
00128     weights=NULL;
00129     weights_degree=0;
00130     weights_length=0;
00131 
00132     delete[] block_weights;
00133     block_weights=NULL;
00134 
00135     delete[] position_weights;
00136     position_weights=NULL;
00137 
00138     delete[] position_weights_lhs;
00139     position_weights_lhs=NULL;
00140 
00141     delete[] position_weights_rhs;
00142     position_weights_rhs=NULL;
00143 
00144     delete[] weights_buffer;
00145     weights_buffer=NULL;
00146 }
00147 
00148 void CWeightedDegreePositionStringKernel::remove_lhs()
00149 {
00150     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00151     delete_optimization();
00152 
00153     tries.destroy();
00154     poim_tries.destroy();
00155 
00156     CKernel::remove_lhs();
00157 }
00158 
00159 void CWeightedDegreePositionStringKernel::create_empty_tries()
00160 {
00161     ASSERT(lhs);
00162     seq_length = ((CStringFeatures<char>*) lhs)->get_max_vector_length();
00163 
00164     if (opt_type==SLOWBUTMEMEFFICIENT)
00165     {
00166         tries.create(seq_length, true); 
00167         poim_tries.create(seq_length, true); 
00168     }
00169     else if (opt_type==FASTBUTMEMHUNGRY)
00170     {
00171         tries.create(seq_length, false);  // still buggy
00172         poim_tries.create(seq_length, false);  // still buggy
00173     }
00174     else
00175         SG_ERROR( "unknown optimization type\n");
00176 }
00177 
00178 bool CWeightedDegreePositionStringKernel::init(CFeatures* l, CFeatures* r)
00179 {
00180     int32_t lhs_changed = (lhs!=l) ;
00181     int32_t rhs_changed = (rhs!=r) ;
00182 
00183     CStringKernel<char>::init(l,r);
00184 
00185     SG_DEBUG( "lhs_changed: %i\n", lhs_changed) ;
00186     SG_DEBUG( "rhs_changed: %i\n", rhs_changed) ;
00187 
00188     CStringFeatures<char>* sf_l=(CStringFeatures<char>*) l;
00189     CStringFeatures<char>* sf_r=(CStringFeatures<char>*) r;
00190 
00191     /* set shift */
00192     if (shift_len==0) {
00193         shift_len=sf_l->get_vector_length(0);
00194         int32_t *shifts=new int32_t[shift_len];
00195         for (int32_t i=0; i<shift_len; i++) {
00196             shifts[i]=1;
00197         }
00198         set_shifts(shifts, shift_len);
00199         delete[] shifts;
00200     }
00201 
00202 
00203     int32_t len=sf_l->get_max_vector_length();
00204     if (lhs_changed && !sf_l->have_same_length(len))
00205         SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n");
00206 
00207     if (rhs_changed && !sf_r->have_same_length(len))
00208         SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n");
00209 
00210     SG_UNREF(alphabet);
00211     alphabet= sf_l->get_alphabet();
00212     CAlphabet* ralphabet=sf_r->get_alphabet();
00213 
00214     if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
00215         properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
00216 
00217     ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet());
00218     SG_UNREF(ralphabet);
00219 
00220     //whenever init is called also init tries and block weights
00221     create_empty_tries();
00222     init_block_weights();
00223 
00224     return init_normalizer();
00225 }
00226 
00227 void CWeightedDegreePositionStringKernel::cleanup()
00228 {
00229     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00230     delete_optimization();
00231 
00232     delete[] block_weights;
00233     block_weights=NULL;
00234 
00235     tries.destroy();
00236     poim_tries.destroy();
00237 
00238     seq_length = 0;
00239     tree_initialized = false;
00240 
00241     SG_UNREF(alphabet);
00242     alphabet=NULL;
00243 
00244     CKernel::cleanup();
00245 }
00246 
00247 bool CWeightedDegreePositionStringKernel::init_optimization(
00248     int32_t p_count, int32_t * IDX, float64_t * alphas, int32_t tree_num,
00249     int32_t upto_tree)
00250 {
00251     ASSERT(position_weights_lhs==NULL);
00252     ASSERT(position_weights_rhs==NULL);
00253 
00254     if (upto_tree<0)
00255         upto_tree=tree_num;
00256 
00257     if (max_mismatch!=0)
00258     {
00259         SG_ERROR( "CWeightedDegreePositionStringKernel optimization not implemented for mismatch!=0\n");
00260         return false ;
00261     }
00262 
00263     if (tree_num<0)
00264         SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00265 
00266     delete_optimization();
00267 
00268     if (tree_num<0)
00269         SG_DEBUG( "initializing CWeightedDegreePositionStringKernel optimization\n") ;
00270 
00271     for (int32_t i=0; i<p_count; i++)
00272     {
00273         if (tree_num<0)
00274         {
00275             if ( (i % (p_count/10+1)) == 0)
00276                 SG_PROGRESS(i,0,p_count);
00277             add_example_to_tree(IDX[i], alphas[i]);
00278         }
00279         else
00280         {
00281             for (int32_t t=tree_num; t<=upto_tree; t++)
00282                 add_example_to_single_tree(IDX[i], alphas[i], t);
00283         }
00284     }
00285 
00286     if (tree_num<0)
00287         SG_DONE();
00288 
00289     set_is_initialized(true) ;
00290     return true ;
00291 }
00292 
00293 bool CWeightedDegreePositionStringKernel::delete_optimization() 
00294 { 
00295     if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes())) 
00296     {
00297         tries.set_use_compact_terminal_nodes(false) ;
00298         SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ;
00299     }
00300 
00301     if (get_is_initialized())
00302     {
00303         if (opt_type==SLOWBUTMEMEFFICIENT)
00304             tries.delete_trees(true); 
00305         else if (opt_type==FASTBUTMEMHUNGRY)
00306             tries.delete_trees(false);  // still buggy
00307         else {
00308             SG_ERROR( "unknown optimization type\n");
00309         }
00310         set_is_initialized(false);
00311 
00312         return true;
00313     }
00314 
00315     return false;
00316 }
00317 
00318 float64_t CWeightedDegreePositionStringKernel::compute_with_mismatch(
00319     char* avec, int32_t alen, char* bvec, int32_t blen)
00320 {
00321     float64_t* max_shift_vec= new float64_t[max_shift];
00322     float64_t sum0=0 ;
00323     for (int32_t i=0; i<max_shift; i++)
00324         max_shift_vec[i]=0 ;
00325     
00326     // no shift
00327     for (int32_t i=0; i<alen; i++)
00328     {
00329         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00330             continue ;
00331         
00332         int32_t mismatches=0;
00333         float64_t sumi = 0.0 ;
00334         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00335         {
00336             if (avec[i+j]!=bvec[i+j])
00337             {
00338                 mismatches++ ;
00339                 if (mismatches>max_mismatch)
00340                     break ;
00341             } ;
00342             sumi += weights[j+degree*mismatches];
00343         }
00344         if (position_weights!=NULL)
00345             sum0 += position_weights[i]*sumi ;
00346         else
00347             sum0 += sumi ;
00348     } ;
00349     
00350     for (int32_t i=0; i<alen; i++)
00351     {
00352         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00353         {
00354             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00355                 continue ;
00356             
00357             float64_t sumi1 = 0.0 ;
00358             // shift in sequence a
00359             int32_t mismatches=0;
00360             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00361             {
00362                 if (avec[i+j+k]!=bvec[i+j])
00363                 {
00364                     mismatches++ ;
00365                     if (mismatches>max_mismatch)
00366                         break ;
00367                 } ;
00368                 sumi1 += weights[j+degree*mismatches];
00369             }
00370             float64_t sumi2 = 0.0 ;
00371             // shift in sequence b
00372             mismatches=0;
00373             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00374             {
00375                 if (avec[i+j]!=bvec[i+j+k])
00376                 {
00377                     mismatches++ ;
00378                     if (mismatches>max_mismatch)
00379                         break ;
00380                 } ;
00381                 sumi2 += weights[j+degree*mismatches];
00382             }
00383             if (position_weights!=NULL)
00384                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00385             else
00386                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00387         } ;
00388     }
00389     
00390     float64_t result = sum0 ;
00391     for (int32_t i=0; i<max_shift; i++)
00392         result += max_shift_vec[i]/(2*(i+1)) ;
00393     
00394     delete[] max_shift_vec;
00395     return result ;
00396 }
00397 
00398 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch(
00399     char* avec, int32_t alen, char* bvec, int32_t blen) 
00400 {
00401     float64_t* max_shift_vec = new float64_t[max_shift];
00402     float64_t sum0=0 ;
00403     for (int32_t i=0; i<max_shift; i++)
00404         max_shift_vec[i]=0 ;
00405 
00406     // no shift
00407     for (int32_t i=0; i<alen; i++)
00408     {
00409         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00410             continue ;
00411 
00412         float64_t sumi = 0.0 ;
00413         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00414         {
00415             if (avec[i+j]!=bvec[i+j])
00416                 break ;
00417             sumi += weights[j];
00418         }
00419         if (position_weights!=NULL)
00420             sum0 += position_weights[i]*sumi ;
00421         else
00422             sum0 += sumi ;
00423     } ;
00424 
00425     for (int32_t i=0; i<alen; i++)
00426     {
00427         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00428         {
00429             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00430                 continue ;
00431 
00432             float64_t sumi1 = 0.0 ;
00433             // shift in sequence a
00434             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00435             {
00436                 if (avec[i+j+k]!=bvec[i+j])
00437                     break ;
00438                 sumi1 += weights[j];
00439             }
00440             float64_t sumi2 = 0.0 ;
00441             // shift in sequence b
00442             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00443             {
00444                 if (avec[i+j]!=bvec[i+j+k])
00445                     break ;
00446                 sumi2 += weights[j];
00447             }
00448             if (position_weights!=NULL)
00449                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00450             else
00451                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00452         } ;
00453     }
00454 
00455     float64_t result = sum0 ;
00456     for (int32_t i=0; i<max_shift; i++)
00457         result += max_shift_vec[i]/(2*(i+1)) ;
00458 
00459     delete[] max_shift_vec;
00460 
00461     return result ;
00462 }
00463 
00464 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_matrix(
00465     char* avec, int32_t alen, char* bvec, int32_t blen) 
00466 {
00467     float64_t* max_shift_vec = new float64_t[max_shift];
00468     float64_t sum0=0 ;
00469     for (int32_t i=0; i<max_shift; i++)
00470         max_shift_vec[i]=0 ;
00471 
00472     // no shift
00473     for (int32_t i=0; i<alen; i++)
00474     {
00475         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00476             continue ;
00477         float64_t sumi = 0.0 ;
00478         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00479         {
00480             if (avec[i+j]!=bvec[i+j])
00481                 break ;
00482             sumi += weights[i*degree+j];
00483         }
00484         if (position_weights!=NULL)
00485             sum0 += position_weights[i]*sumi ;
00486         else
00487             sum0 += sumi ;
00488     } ;
00489 
00490     for (int32_t i=0; i<alen; i++)
00491     {
00492         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00493         {
00494             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00495                 continue ;
00496 
00497             float64_t sumi1 = 0.0 ;
00498             // shift in sequence a
00499             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00500             {
00501                 if (avec[i+j+k]!=bvec[i+j])
00502                     break ;
00503                 sumi1 += weights[i*degree+j];
00504             }
00505             float64_t sumi2 = 0.0 ;
00506             // shift in sequence b
00507             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00508             {
00509                 if (avec[i+j]!=bvec[i+j+k])
00510                     break ;
00511                 sumi2 += weights[i*degree+j];
00512             }
00513             if (position_weights!=NULL)
00514                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00515             else
00516                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00517         } ;
00518     }
00519 
00520     float64_t result = sum0 ;
00521     for (int32_t i=0; i<max_shift; i++)
00522         result += max_shift_vec[i]/(2*(i+1)) ;
00523 
00524     delete[] max_shift_vec;
00525     return result ;
00526 }
00527 
00528 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_position_weights(
00529     char* avec, float64_t* pos_weights_lhs, int32_t alen, char* bvec,
00530     float64_t* pos_weights_rhs, int32_t blen)
00531 {
00532     float64_t* max_shift_vec = new float64_t[max_shift];
00533     float64_t sum0=0 ;
00534     for (int32_t i=0; i<max_shift; i++)
00535         max_shift_vec[i]=0 ;
00536 
00537     // no shift
00538     for (int32_t i=0; i<alen; i++)
00539     {
00540         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00541             continue ;
00542 
00543         float64_t sumi = 0.0 ;
00544         float64_t posweight_lhs = 0.0 ;
00545         float64_t posweight_rhs = 0.0 ;
00546         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00547         {
00548             posweight_lhs += pos_weights_lhs[i+j] ;
00549             posweight_rhs += pos_weights_rhs[i+j] ;
00550             
00551             if (avec[i+j]!=bvec[i+j])
00552                 break ;
00553             sumi += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00554         }
00555         if (position_weights!=NULL)
00556             sum0 += position_weights[i]*sumi ;
00557         else
00558             sum0 += sumi ;
00559     } ;
00560 
00561     for (int32_t i=0; i<alen; i++)
00562     {
00563         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00564         {
00565             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00566                 continue ;
00567 
00568             // shift in sequence a  
00569             float64_t sumi1 = 0.0 ;
00570             float64_t posweight_lhs = 0.0 ;
00571             float64_t posweight_rhs = 0.0 ;
00572             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00573             {
00574                 posweight_lhs += pos_weights_lhs[i+j+k] ;
00575                 posweight_rhs += pos_weights_rhs[i+j] ;
00576                 if (avec[i+j+k]!=bvec[i+j])
00577                     break ;
00578                 sumi1 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00579             }
00580             // shift in sequence b
00581             float64_t sumi2 = 0.0 ;
00582             posweight_lhs = 0.0 ;
00583             posweight_rhs = 0.0 ;
00584             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00585             {
00586                 posweight_lhs += pos_weights_lhs[i+j] ;
00587                 posweight_rhs += pos_weights_rhs[i+j+k] ;
00588                 if (avec[i+j]!=bvec[i+j+k])
00589                     break ;
00590                 sumi2 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00591             }
00592             if (position_weights!=NULL)
00593                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00594             else
00595                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00596         } ;
00597     }
00598 
00599     float64_t result = sum0 ;
00600     for (int32_t i=0; i<max_shift; i++)
00601         result += max_shift_vec[i]/(2*(i+1)) ;
00602 
00603     delete[] max_shift_vec;
00604     return result ;
00605 }
00606 
00607 
00608 float64_t CWeightedDegreePositionStringKernel::compute(
00609     int32_t idx_a, int32_t idx_b)
00610 {
00611     int32_t alen, blen;
00612     bool free_avec, free_bvec;
00613 
00614     char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00615     char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00616     // can only deal with strings of same length
00617     ASSERT(alen==blen);
00618     ASSERT(shift_len==alen);
00619 
00620     float64_t result = 0 ;
00621     if (position_weights_lhs!=NULL || position_weights_rhs!=NULL)
00622     {
00623         ASSERT(max_mismatch==0);
00624         float64_t* position_weights_rhs_ = position_weights_rhs ;
00625         if (lhs==rhs)
00626             position_weights_rhs_ = position_weights_lhs ;
00627         
00628         result = compute_without_mismatch_position_weights(avec, &position_weights_lhs[idx_a*alen], alen, bvec, &position_weights_rhs_[idx_b*blen], blen) ;
00629     }
00630     else if (max_mismatch > 0)
00631         result = compute_with_mismatch(avec, alen, bvec, blen) ;
00632     else if (length==0)
00633         result = compute_without_mismatch(avec, alen, bvec, blen) ;
00634     else
00635         result = compute_without_mismatch_matrix(avec, alen, bvec, blen) ;
00636 
00637     ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00638     ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00639 
00640     return result ;
00641 }
00642 
00643 
00644 void CWeightedDegreePositionStringKernel::add_example_to_tree(
00645     int32_t idx, float64_t alpha)
00646 {
00647     ASSERT(position_weights_lhs==NULL);
00648     ASSERT(position_weights_rhs==NULL);
00649     ASSERT(alphabet);
00650     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00651 
00652     int32_t len=0;
00653     bool free_vec;
00654     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
00655     ASSERT(max_mismatch==0);
00656     int32_t *vec = new int32_t[len] ;
00657 
00658     for (int32_t i=0; i<len; i++)
00659         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00660     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00661 
00662     if (opt_type==FASTBUTMEMHUNGRY)
00663     {
00664         //TRIES(set_use_compact_terminal_nodes(false)) ;
00665         ASSERT(!TRIES(get_use_compact_terminal_nodes()));
00666     }
00667 
00668     for (int32_t i=0; i<len; i++)
00669     {
00670         int32_t max_s=-1;
00671 
00672         if (opt_type==SLOWBUTMEMEFFICIENT)
00673             max_s=0;
00674         else if (opt_type==FASTBUTMEMHUNGRY)
00675             max_s=shift[i];
00676         else {
00677             SG_ERROR( "unknown optimization type\n");
00678         }
00679 
00680         for (int32_t s=max_s; s>=0; s--)
00681         {
00682             float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00683             TRIES(add_to_trie(i, s, vec, alpha_pw, weights, (length!=0))) ;
00684             if ((s==0) || (i+s>=len))
00685                 continue;
00686 
00687             TRIES(add_to_trie(i+s, -s, vec, alpha_pw, weights, (length!=0))) ;
00688         }
00689     }
00690 
00691     delete[] vec ;
00692     tree_initialized=true ;
00693 }
00694 
00695 void CWeightedDegreePositionStringKernel::add_example_to_single_tree(
00696     int32_t idx, float64_t alpha, int32_t tree_num) 
00697 {
00698     ASSERT(position_weights_lhs==NULL);
00699     ASSERT(position_weights_rhs==NULL);
00700     ASSERT(alphabet);
00701     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00702 
00703     int32_t len=0;
00704     bool free_vec;
00705     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
00706     ASSERT(max_mismatch==0);
00707     int32_t *vec=new int32_t[len];
00708     int32_t max_s=-1;
00709 
00710     if (opt_type==SLOWBUTMEMEFFICIENT)
00711         max_s=0;
00712     else if (opt_type==FASTBUTMEMHUNGRY)
00713     {
00714         ASSERT(!tries.get_use_compact_terminal_nodes());
00715         max_s=shift[tree_num];
00716     }
00717     else {
00718         SG_ERROR( "unknown optimization type\n");
00719     }
00720     for (int32_t i=CMath::max(0,tree_num-max_shift);
00721             i<CMath::min(len,tree_num+degree+max_shift); i++)
00722     {
00723         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00724     } 
00725     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00726 
00727     for (int32_t s=max_s; s>=0; s--)
00728     {
00729         float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00730         tries.add_to_trie(tree_num, s, vec, alpha_pw, weights, (length!=0)) ;
00731     } 
00732 
00733     if (opt_type==FASTBUTMEMHUNGRY)
00734     {
00735         for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+max_shift+1); i++)
00736         {
00737             int32_t s=tree_num-i;
00738             if ((i+s<len) && (s>=1) && (s<=shift[i]))
00739             {
00740                 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00741                 tries.add_to_trie(tree_num, -s, vec, alpha_pw, weights, (length!=0)) ; 
00742             }
00743         }
00744     }
00745     delete[] vec ;
00746     tree_initialized=true ;
00747 }
00748 
00749 float64_t CWeightedDegreePositionStringKernel::compute_by_tree(int32_t idx)
00750 {
00751     ASSERT(position_weights_lhs==NULL);
00752     ASSERT(position_weights_rhs==NULL);
00753     ASSERT(alphabet);
00754     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00755 
00756     float64_t sum=0;
00757     int32_t len=0;
00758     bool free_vec;
00759     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
00760     ASSERT(max_mismatch==0);
00761     int32_t *vec=new int32_t[len];
00762 
00763     for (int32_t i=0; i<len; i++)
00764         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00765 
00766     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00767 
00768     for (int32_t i=0; i<len; i++)
00769         sum += tries.compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0)) ;
00770 
00771     if (opt_type==SLOWBUTMEMEFFICIENT)
00772     {
00773         for (int32_t i=0; i<len; i++)
00774         {
00775             for (int32_t s=1; (s<=shift[i]) && (i+s<len); s++)
00776             {
00777                 sum+=tries.compute_by_tree_helper(vec, len, i, i+s, i, weights, (length!=0))/(2*s) ;
00778                 sum+=tries.compute_by_tree_helper(vec, len, i+s, i, i+s, weights, (length!=0))/(2*s) ;
00779             }
00780         }
00781     }
00782 
00783     delete[] vec ;
00784 
00785     return normalizer->normalize_rhs(sum, idx);
00786 }
00787 
00788 void CWeightedDegreePositionStringKernel::compute_by_tree(
00789     int32_t idx, float64_t* LevelContrib)
00790 {
00791     ASSERT(position_weights_lhs==NULL);
00792     ASSERT(position_weights_rhs==NULL);
00793     ASSERT(alphabet);
00794     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00795 
00796     int32_t len=0;
00797     bool free_vec;
00798     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
00799     ASSERT(max_mismatch==0);
00800     int32_t *vec=new int32_t[len];
00801 
00802     for (int32_t i=0; i<len; i++)
00803         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00804 
00805     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00806 
00807     for (int32_t i=0; i<len; i++)
00808     {
00809         tries.compute_by_tree_helper(vec, len, i, i, i, LevelContrib,
00810                 normalizer->normalize_rhs(1.0, idx), mkl_stepsize, weights,
00811                 (length!=0));
00812     }
00813 
00814     if (opt_type==SLOWBUTMEMEFFICIENT)
00815     {
00816         for (int32_t i=0; i<len; i++)
00817             for (int32_t k=1; (k<=shift[i]) && (i+k<len); k++)
00818             {
00819                 tries.compute_by_tree_helper(vec, len, i, i+k, i, LevelContrib,
00820                         normalizer->normalize_rhs(1.0/(2*k), idx), mkl_stepsize,
00821                         weights, (length!=0)) ;
00822                 tries.compute_by_tree_helper(vec, len, i+k, i, i+k,
00823                         LevelContrib, normalizer->normalize_rhs(1.0/(2*k), idx),
00824                         mkl_stepsize, weights, (length!=0)) ;
00825             }
00826     }
00827 
00828     delete[] vec ;
00829 }
00830 
00831 float64_t* CWeightedDegreePositionStringKernel::compute_abs_weights(
00832     int32_t &len)
00833 {
00834     return tries.compute_abs_weights(len);
00835 }
00836 
00837 bool CWeightedDegreePositionStringKernel::set_shifts(
00838     int32_t* shift_, int32_t shift_len_)
00839 {
00840     delete[] shift;
00841 
00842     shift_len = shift_len_ ;
00843     shift = new int32_t[shift_len] ;
00844 
00845     if (shift)
00846     {
00847         max_shift = 0 ;
00848 
00849         for (int32_t i=0; i<shift_len; i++)
00850         {
00851             shift[i] = shift_[i] ;
00852             max_shift = CMath::max(shift[i], max_shift);
00853         }
00854 
00855         ASSERT(max_shift>=0 && max_shift<=shift_len);
00856     }
00857     
00858     return false;
00859 }
00860 
00861 bool CWeightedDegreePositionStringKernel::set_wd_weights()
00862 {
00863     ASSERT(degree>0);
00864 
00865     delete[] weights;
00866     weights=new float64_t[degree];
00867     weights_degree=degree;
00868     weights_length=1;
00869 
00870     if (weights)
00871     {
00872         int32_t i;
00873         float64_t sum=0;
00874         for (i=0; i<degree; i++)
00875         {
00876             weights[i]=degree-i;
00877             sum+=weights[i];
00878         }
00879         for (i=0; i<degree; i++)
00880             weights[i]/=sum;
00881 
00882         for (i=0; i<degree; i++)
00883         {
00884             for (int32_t j=1; j<=max_mismatch; j++)
00885             {
00886                 if (j<i+1)
00887                 {
00888                     int32_t nk=CMath::nchoosek(i+1, j);
00889                     weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j));
00890                 }
00891                 else
00892                     weights[i+j*degree]= 0;
00893             }
00894         }
00895 
00896         return true;
00897     }
00898     else
00899         return false;
00900 }
00901 
00902 bool CWeightedDegreePositionStringKernel::set_weights(
00903     float64_t* ws, int32_t d, int32_t len)
00904 {
00905     if (d!=degree || len<0)
00906         SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree);
00907 
00908     degree=d;
00909     length=len;
00910 
00911     if (len <= 0)
00912         len=1;
00913 
00914     weights_degree=degree;
00915     weights_length=len+max_mismatch;
00916 
00917     SG_DEBUG("Creating weights of size %dx%d\n", weights_degree, weights_length);
00918     int32_t num_weights=weights_degree*weights_length;
00919     delete[] weights;
00920     weights=new float64_t[num_weights];
00921 
00922     for (int32_t i=0; i<degree*len; i++)
00923         weights[i]=ws[i];
00924 
00925     return true;
00926 }
00927 
00928 bool CWeightedDegreePositionStringKernel::set_position_weights(
00929     float64_t* pws, int32_t len)
00930 {
00931     if (seq_length==0)
00932         seq_length=len;
00933 
00934     if (seq_length!=len)
00935     {
00936         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00937         return false;
00938     }
00939     delete[] position_weights;
00940     position_weights=new float64_t[len];
00941     position_weights_len=len;
00942     tries.set_position_weights(position_weights);
00943 
00944     if (position_weights)
00945     {
00946         for (int32_t i=0; i<len; i++)
00947             position_weights[i]=pws[i];
00948         return true;
00949     }
00950     else
00951         return false;
00952 }
00953 
00954 bool CWeightedDegreePositionStringKernel::set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num)
00955 {
00956     if (position_weights_rhs==position_weights_lhs)
00957         position_weights_rhs=NULL;
00958     else
00959         delete_position_weights_rhs();
00960 
00961     if (len==0)
00962     {
00963         return delete_position_weights_lhs();
00964     }
00965     
00966     if (seq_length!=len)
00967     {
00968         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00969         return false;
00970     }
00971     
00972     delete[] position_weights_lhs;
00973     position_weights_lhs=new float64_t[len*num];
00974     position_weights_lhs_len=len*num;
00975 
00976     for (int32_t i=0; i<len*num; i++)
00977         position_weights_lhs[i]=pws[i];
00978 
00979     return true;
00980 }
00981 
00982 bool CWeightedDegreePositionStringKernel::set_position_weights_rhs(
00983     float64_t* pws, int32_t len, int32_t num)
00984 {
00985     if (len==0)
00986     {
00987         if (position_weights_rhs==position_weights_lhs)
00988         {
00989             position_weights_rhs=NULL;
00990             return true;
00991         }
00992         return delete_position_weights_rhs();
00993     }
00994 
00995     if (seq_length!=len)
00996     {
00997         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00998         return false;
00999     }
01000     
01001     delete[] position_weights_rhs;
01002     position_weights_rhs=new float64_t[len*num];
01003     position_weights_rhs_len=len*num;
01004 
01005     for (int32_t i=0; i<len*num; i++)
01006         position_weights_rhs[i]=pws[i];
01007 
01008     return true;
01009 }
01010 
01011 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd()
01012 {
01013     delete[] block_weights;
01014     block_weights=new float64_t[CMath::max(seq_length,degree)];
01015 
01016     if (block_weights)
01017     {
01018         int32_t k;
01019         float64_t d=degree; // use float to evade rounding errors below
01020 
01021         for (k=0; k<degree; k++)
01022             block_weights[k]=
01023                 (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1));
01024         for (k=degree; k<seq_length; k++)
01025             block_weights[k]=(-d+3*k+4)/3;
01026     }
01027 
01028     return (block_weights!=NULL);
01029 }
01030 
01031 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd_external()
01032 {
01033     ASSERT(weights);
01034     delete[] block_weights;
01035     block_weights=new float64_t[CMath::max(seq_length,degree)];
01036 
01037     if (block_weights)
01038     {
01039         int32_t i=0;
01040         block_weights[0]=weights[0];
01041         for (i=1; i<CMath::max(seq_length,degree); i++)
01042             block_weights[i]=0;
01043 
01044         for (i=1; i<CMath::max(seq_length,degree); i++)
01045         {
01046             block_weights[i]=block_weights[i-1];
01047 
01048             float64_t contrib=0;
01049             for (int32_t j=0; j<CMath::min(degree,i+1); j++)
01050                 contrib+=weights[j];
01051 
01052             block_weights[i]+=contrib;
01053         }
01054     }
01055 
01056     return (block_weights!=NULL);
01057 }
01058 
01059 bool CWeightedDegreePositionStringKernel::init_block_weights_const()
01060 {
01061     delete[] block_weights;
01062     block_weights=new float64_t[seq_length];
01063 
01064     if (block_weights)
01065     {
01066         for (int32_t i=1; i<seq_length+1 ; i++)
01067             block_weights[i-1]=1.0/seq_length;
01068     }
01069 
01070     return (block_weights!=NULL);
01071 }
01072 
01073 bool CWeightedDegreePositionStringKernel::init_block_weights_linear()
01074 {
01075     delete[] block_weights;
01076     block_weights=new float64_t[seq_length];
01077 
01078     if (block_weights)
01079     {
01080         for (int32_t i=1; i<seq_length+1 ; i++)
01081             block_weights[i-1]=degree*i;
01082     }
01083 
01084     return (block_weights!=NULL);
01085 }
01086 
01087 bool CWeightedDegreePositionStringKernel::init_block_weights_sqpoly()
01088 {
01089     delete[] block_weights;
01090     block_weights=new float64_t[seq_length];
01091 
01092     if (block_weights)
01093     {
01094         for (int32_t i=1; i<degree+1 ; i++)
01095             block_weights[i-1]=((float64_t) i)*i;
01096 
01097         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01098             block_weights[i-1]=i;
01099     }
01100 
01101     return (block_weights!=NULL);
01102 }
01103 
01104 bool CWeightedDegreePositionStringKernel::init_block_weights_cubicpoly()
01105 {
01106     delete[] block_weights;
01107     block_weights=new float64_t[seq_length];
01108 
01109     if (block_weights)
01110     {
01111         for (int32_t i=1; i<degree+1 ; i++)
01112             block_weights[i-1]=((float64_t) i)*i*i;
01113 
01114         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01115             block_weights[i-1]=i;
01116     }
01117 
01118     return (block_weights!=NULL);
01119 }
01120 
01121 bool CWeightedDegreePositionStringKernel::init_block_weights_exp()
01122 {
01123     delete[] block_weights;
01124     block_weights=new float64_t[seq_length];
01125 
01126     if (block_weights)
01127     {
01128         for (int32_t i=1; i<degree+1 ; i++)
01129             block_weights[i-1]=exp(((float64_t) i/10.0));
01130 
01131         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01132             block_weights[i-1]=i;
01133     }
01134 
01135     return (block_weights!=NULL);
01136 }
01137 
01138 bool CWeightedDegreePositionStringKernel::init_block_weights_log()
01139 {
01140     delete[] block_weights;
01141     block_weights=new float64_t[seq_length];
01142 
01143     if (block_weights)
01144     {
01145         for (int32_t i=1; i<degree+1 ; i++)
01146             block_weights[i-1]=CMath::pow(CMath::log((float64_t) i),2);
01147 
01148         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01149             block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2);
01150     }
01151 
01152     return (block_weights!=NULL);
01153 }
01154 
01155 bool CWeightedDegreePositionStringKernel::init_block_weights()
01156 {
01157     switch (type)
01158     {
01159         case E_WD:
01160             return init_block_weights_from_wd();
01161         case E_EXTERNAL:
01162             return init_block_weights_from_wd_external();
01163         case E_BLOCK_CONST:
01164             return init_block_weights_const();
01165         case E_BLOCK_LINEAR:
01166             return init_block_weights_linear();
01167         case E_BLOCK_SQPOLY:
01168             return init_block_weights_sqpoly();
01169         case E_BLOCK_CUBICPOLY:
01170             return init_block_weights_cubicpoly();
01171         case E_BLOCK_EXP:
01172             return init_block_weights_exp();
01173         case E_BLOCK_LOG:
01174             return init_block_weights_log();
01175     };
01176     return false;
01177 }
01178 
01179 
01180 
01181 void* CWeightedDegreePositionStringKernel::compute_batch_helper(void* p)
01182 {
01183     S_THREAD_PARAM<DNATrie>* params = (S_THREAD_PARAM<DNATrie>*) p;
01184     int32_t j=params->j;
01185     CWeightedDegreePositionStringKernel* wd=params->kernel;
01186     CTrie<DNATrie>* tries=params->tries;
01187     float64_t* weights=params->weights;
01188     int32_t length=params->length;
01189     int32_t max_shift=params->max_shift;
01190     int32_t* vec=params->vec;
01191     float64_t* result=params->result;
01192     float64_t factor=params->factor;
01193     int32_t* shift=params->shift;
01194     int32_t* vec_idx=params->vec_idx;
01195 
01196     for (int32_t i=params->start; i<params->end; i++)
01197     {
01198         int32_t len=0;
01199         CStringFeatures<char>* rhs_feat=((CStringFeatures<char>*) wd->get_rhs());
01200         CAlphabet* alpha=wd->alphabet;
01201 
01202         bool free_vec;
01203         char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len, free_vec);
01204         for (int32_t k=CMath::max(0,j-max_shift); k<CMath::min(len,j+wd->get_degree()+max_shift); k++)
01205             vec[k]=alpha->remap_to_bin(char_vec[k]);
01206         rhs_feat->free_feature_vector(char_vec, vec_idx[i], free_vec);
01207 
01208         SG_UNREF(rhs_feat);
01209 
01210         result[i] += factor*wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]);
01211 
01212         if (wd->get_optimization_type()==SLOWBUTMEMEFFICIENT)
01213         {
01214             for (int32_t q=CMath::max(0,j-max_shift); q<CMath::min(len,j+max_shift+1); q++)
01215             {
01216                 int32_t s=j-q ;
01217                 if ((s>=1) && (s<=shift[q]) && (q+s<len))
01218                 {
01219                     result[i] +=
01220                         wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01221                                 len, q, q+s, q, weights, (length!=0)),
01222                                 vec_idx[i])/(2.0*s);
01223                 }
01224             }
01225 
01226             for (int32_t s=1; (s<=shift[j]) && (j+s<len); s++)
01227             {
01228                 result[i] +=
01229                     wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01230                                 len, j+s, j, j+s, weights, (length!=0)),
01231                                 vec_idx[i])/(2.0*s);
01232             }
01233         }
01234     }
01235 
01236     return NULL;
01237 }
01238 
01239 void CWeightedDegreePositionStringKernel::compute_batch(
01240     int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec,
01241     int32_t* IDX, float64_t* alphas, float64_t factor)
01242 {
01243     ASSERT(alphabet);
01244     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01245     ASSERT(position_weights_lhs==NULL);
01246     ASSERT(position_weights_rhs==NULL);
01247     ASSERT(rhs);
01248     ASSERT(num_vec<=rhs->get_num_vectors());
01249     ASSERT(num_vec>0);
01250     ASSERT(vec_idx);
01251     ASSERT(result);
01252     create_empty_tries();
01253 
01254     int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01255     ASSERT(num_feat>0);
01256     int32_t num_threads=parallel->get_num_threads();
01257     ASSERT(num_threads>0);
01258     int32_t* vec=new int32_t[num_threads*num_feat];
01259 
01260     if (num_threads < 2)
01261     {
01262 #ifdef WIN32
01263        for (int32_t j=0; j<num_feat; j++)
01264 #else
01265        CSignal::clear_cancel();
01266        for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01267 #endif
01268             {
01269                 init_optimization(num_suppvec, IDX, alphas, j);
01270                 S_THREAD_PARAM<DNATrie> params;
01271                 params.vec=vec;
01272                 params.result=result;
01273                 params.weights=weights;
01274                 params.kernel=this;
01275                 params.tries=&tries;
01276                 params.factor=factor;
01277                 params.j=j;
01278                 params.start=0;
01279                 params.end=num_vec;
01280                 params.length=length;
01281                 params.max_shift=max_shift;
01282                 params.shift=shift;
01283                 params.vec_idx=vec_idx;
01284                 compute_batch_helper((void*) &params);
01285 
01286                 SG_PROGRESS(j,0,num_feat);
01287             }
01288     }
01289 #ifndef WIN32
01290     else
01291     {
01292 
01293         CSignal::clear_cancel();
01294         for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01295         {
01296             init_optimization(num_suppvec, IDX, alphas, j);
01297             pthread_t* threads = new pthread_t[num_threads-1];
01298             S_THREAD_PARAM<DNATrie>* params = new S_THREAD_PARAM<DNATrie>[num_threads];
01299             int32_t step= num_vec/num_threads;
01300             int32_t t;
01301 
01302             for (t=0; t<num_threads-1; t++)
01303             {
01304                 params[t].vec=&vec[num_feat*t];
01305                 params[t].result=result;
01306                 params[t].weights=weights;
01307                 params[t].kernel=this;
01308                 params[t].tries=&tries;
01309                 params[t].factor=factor;
01310                 params[t].j=j;
01311                 params[t].start = t*step;
01312                 params[t].end = (t+1)*step;
01313                 params[t].length=length;
01314                 params[t].max_shift=max_shift;
01315                 params[t].shift=shift;
01316                 params[t].vec_idx=vec_idx;
01317                 pthread_create(&threads[t], NULL, CWeightedDegreePositionStringKernel::compute_batch_helper, (void*)&params[t]);
01318             }
01319 
01320             params[t].vec=&vec[num_feat*t];
01321             params[t].result=result;
01322             params[t].weights=weights;
01323             params[t].kernel=this;
01324             params[t].tries=&tries;
01325             params[t].factor=factor;
01326             params[t].j=j;
01327             params[t].start=t*step;
01328             params[t].end=num_vec;
01329             params[t].length=length;
01330             params[t].max_shift=max_shift;
01331             params[t].shift=shift;
01332             params[t].vec_idx=vec_idx;
01333             compute_batch_helper((void*) &params[t]);
01334 
01335             for (t=0; t<num_threads-1; t++)
01336                 pthread_join(threads[t], NULL);
01337             SG_PROGRESS(j,0,num_feat);
01338 
01339             delete[] params;
01340             delete[] threads;
01341         }
01342     }
01343 #endif
01344 
01345     delete[] vec;
01346 
01347     //really also free memory as this can be huge on testing especially when
01348     //using the combined kernel
01349     create_empty_tries();
01350 }
01351 
01352 float64_t* CWeightedDegreePositionStringKernel::compute_scoring(
01353     int32_t max_degree, int32_t& num_feat, int32_t& num_sym, float64_t* result,
01354     int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01355 {
01356     ASSERT(position_weights_lhs==NULL);
01357     ASSERT(position_weights_rhs==NULL);
01358 
01359     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01360     ASSERT(num_feat>0);
01361     ASSERT(alphabet);
01362     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01363 
01364     num_sym=4; //for now works only w/ DNA
01365 
01366     ASSERT(max_degree>0);
01367 
01368     // === variables
01369     int32_t* nofsKmers=new int32_t[max_degree];
01370     float64_t** C=new float64_t*[max_degree];
01371     float64_t** L=new float64_t*[max_degree];
01372     float64_t** R=new float64_t*[max_degree];
01373 
01374     int32_t i;
01375     int32_t k;
01376 
01377     // --- return table
01378     int32_t bigtabSize=0;
01379     for (k=0; k<max_degree; ++k )
01380     {
01381         nofsKmers[k]=(int32_t) CMath::pow(num_sym, k+1);
01382         const int32_t tabSize=nofsKmers[k]*num_feat;
01383         bigtabSize+=tabSize;
01384     }
01385     result=new float64_t[bigtabSize];
01386 
01387     // --- auxilliary tables
01388     int32_t tabOffs=0;
01389     for( k = 0; k < max_degree; ++k )
01390     {
01391         const int32_t tabSize = nofsKmers[k] * num_feat;
01392         C[k] = &result[tabOffs];
01393         L[k] = new float64_t[ tabSize ];
01394         R[k] = new float64_t[ tabSize ];
01395         tabOffs+=tabSize;
01396         for(i = 0; i < tabSize; i++ )
01397         {
01398             C[k][i] = 0.0;
01399             L[k][i] = 0.0;
01400             R[k][i] = 0.0;
01401         }
01402     }
01403 
01404     // --- tree parsing info
01405     float64_t* margFactors=new float64_t[degree];
01406 
01407     int32_t* x = new int32_t[ degree+1 ];
01408     int32_t* substrs = new int32_t[ degree+1 ];
01409     // - fill arrays
01410     margFactors[0] = 1.0;
01411     substrs[0] = 0;
01412     for( k=1; k < degree; ++k ) {
01413         margFactors[k] = 0.25 * margFactors[k-1];
01414         substrs[k] = -1;
01415     }
01416     substrs[degree] = -1;
01417     // - fill struct
01418     struct TreeParseInfo info;
01419     info.num_sym = num_sym;
01420     info.num_feat = num_feat;
01421     info.p = -1;
01422     info.k = -1;
01423     info.nofsKmers = nofsKmers;
01424     info.margFactors = margFactors;
01425     info.x = x;
01426     info.substrs = substrs;
01427     info.y0 = 0;
01428     info.C_k = NULL;
01429     info.L_k = NULL;
01430     info.R_k = NULL;
01431 
01432     // === main loop
01433     i = 0; // total progress
01434     for( k = 0; k < max_degree; ++k )
01435     {
01436         const int32_t nofKmers = nofsKmers[ k ];
01437         info.C_k = C[k];
01438         info.L_k = L[k];
01439         info.R_k = R[k];
01440 
01441         // --- run over all trees
01442         for(int32_t p = 0; p < num_feat; ++p )
01443         {
01444             init_optimization( num_suppvec, IDX, alphas, p );
01445             int32_t tree = p ;
01446             for(int32_t j = 0; j < degree+1; j++ ) {
01447                 x[j] = -1;
01448             }
01449             tries.traverse( tree, p, info, 0, x, k );
01450             SG_PROGRESS(i++,0,num_feat*max_degree);
01451         }
01452 
01453         // --- add partial overlap scores
01454         if( k > 0 ) {
01455             const int32_t j = k - 1;
01456             const int32_t nofJmers = (int32_t) CMath::pow( num_sym, j+1 );
01457             for(int32_t p = 0; p < num_feat; ++p ) {
01458                 const int32_t offsetJ = nofJmers * p;
01459                 const int32_t offsetJ1 = nofJmers * (p+1);
01460                 const int32_t offsetK = nofKmers * p;
01461                 int32_t y;
01462                 int32_t sym;
01463                 for( y = 0; y < nofJmers; ++y ) {
01464                     for( sym = 0; sym < num_sym; ++sym ) {
01465                         const int32_t y_sym = num_sym*y + sym;
01466                         const int32_t sym_y = nofJmers*sym + y;
01467                         ASSERT(0<=y_sym && y_sym<nofKmers);
01468                         ASSERT(0<=sym_y && sym_y<nofKmers);
01469                         C[k][ y_sym + offsetK ] += L[j][ y + offsetJ ];
01470                         if( p < num_feat-1 ) {
01471                             C[k][ sym_y + offsetK ] += R[j][ y + offsetJ1 ];
01472                         }
01473                     }
01474                 }
01475             }
01476         }
01477         //   if( k > 1 )
01478         //     j = k-1
01479         //     for all positions p
01480         //       for all j-mers y
01481         //          for n in {A,C,G,T}
01482         //            C_k[ p, [y,n] ] += L_j[ p, y ]
01483         //            C_k[ p, [n,y] ] += R_j[ p+1, y ]
01484         //          end;
01485         //       end;
01486         //     end;
01487         //   end;
01488     }
01489 
01490     // === return a vector
01491     num_feat=1;
01492     num_sym = bigtabSize;
01493     // --- clean up
01494     delete[] nofsKmers;
01495     delete[] margFactors;
01496     delete[] substrs;
01497     delete[] x;
01498     delete[] C;
01499     for( k = 0; k < max_degree; ++k ) {
01500         delete[] L[k];
01501         delete[] R[k];
01502     }
01503     delete[] L;
01504     delete[] R;
01505     return result;
01506 }
01507 
01508 char* CWeightedDegreePositionStringKernel::compute_consensus(
01509     int32_t &num_feat, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01510 {
01511     ASSERT(position_weights_lhs==NULL);
01512     ASSERT(position_weights_rhs==NULL);
01513     //only works for order <= 32
01514     ASSERT(degree<=32);
01515     ASSERT(!tries.get_use_compact_terminal_nodes());
01516     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01517     ASSERT(num_feat>0);
01518     ASSERT(alphabet);
01519     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01520 
01521     //consensus
01522     char* result=new char[num_feat];
01523 
01524     //backtracking and scoring table
01525     int32_t num_tables=CMath::max(1,num_feat-degree+1);
01526     DynArray<ConsensusEntry>** table=new DynArray<ConsensusEntry>*[num_tables];
01527 
01528     for (int32_t i=0; i<num_tables; i++)
01529         table[i]=new DynArray<ConsensusEntry>(num_suppvec/10);
01530 
01531     //compute consensus via dynamic programming
01532     for (int32_t i=0; i<num_tables; i++)
01533     {
01534         bool cumulative=false;
01535 
01536         if (i<num_tables-1)
01537             init_optimization(num_suppvec, IDX, alphas, i);
01538         else
01539         {
01540             init_optimization(num_suppvec, IDX, alphas, i, num_feat-1);
01541             cumulative=true;
01542         }
01543 
01544         if (i==0)
01545             tries.fill_backtracking_table(i, NULL, table[i], cumulative, weights);
01546         else
01547             tries.fill_backtracking_table(i, table[i-1], table[i], cumulative, weights);
01548 
01549         SG_PROGRESS(i,0,num_feat);
01550     }
01551 
01552 
01553     //int32_t n=table[0]->get_num_elements();
01554 
01555     //for (int32_t i=0; i<n; i++)
01556     //{
01557     //  ConsensusEntry e= table[0]->get_element(i);
01558     //  SG_PRint32_t("first: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01559     //}
01560 
01561     //n=table[num_tables-1]->get_num_elements();
01562     //for (int32_t i=0; i<n; i++)
01563     //{
01564     //  ConsensusEntry e= table[num_tables-1]->get_element(i);
01565     //  SG_PRint32_t("last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01566     //}
01567     //n=table[num_tables-2]->get_num_elements();
01568     //for (int32_t i=0; i<n; i++)
01569     //{
01570     //  ConsensusEntry e= table[num_tables-2]->get_element(i);
01571     //  SG_PRINT("second last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01572     //}
01573 
01574     const char* acgt="ACGT";
01575 
01576     //backtracking start
01577     int32_t max_idx=-1;
01578     float32_t max_score=0;
01579     int32_t num_elements=table[num_tables-1]->get_num_elements();
01580 
01581     for (int32_t i=0; i<num_elements; i++)
01582     {
01583         float64_t sc=table[num_tables-1]->get_element(i).score;
01584         if (sc>max_score || max_idx==-1)
01585         {
01586             max_idx=i;
01587             max_score=sc;
01588         }
01589     }
01590     uint64_t endstr=table[num_tables-1]->get_element(max_idx).string;
01591 
01592     SG_INFO("max_idx:%d num_el:%d num_feat:%d num_tables:%d max_score:%f\n", max_idx, num_elements, num_feat, num_tables, max_score);
01593 
01594     for (int32_t i=0; i<degree; i++)
01595         result[num_feat-1-i]=acgt[(endstr >> (2*i)) & 3];
01596 
01597     if (num_tables>1)
01598     {
01599         for (int32_t i=num_tables-1; i>=0; i--)
01600         {
01601             //SG_PRINT("max_idx: %d, i:%d\n", max_idx, i);
01602             result[i]=acgt[table[i]->get_element(max_idx).string >> (2*(degree-1)) & 3];
01603             max_idx=table[i]->get_element(max_idx).bt;
01604         }
01605     }
01606 
01607     //for (int32_t t=0; t<num_tables; t++)
01608     //{
01609     //  n=table[t]->get_num_elements();
01610     //  for (int32_t i=0; i<n; i++)
01611     //  {
01612     //      ConsensusEntry e= table[t]->get_element(i);
01613     //      SG_PRINT("table[%d,%d]: str:0%0llx sc:%+f bt:%d\n",t,i, e.string,e.score,e.bt);
01614     //  }
01615     //}
01616 
01617     for (int32_t i=0; i<num_tables; i++)
01618         delete table[i];
01619 
01620     delete[] table;
01621     return result;
01622 }
01623 
01624 
01625 float64_t* CWeightedDegreePositionStringKernel::extract_w(
01626     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01627     float64_t* w_result, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01628 {
01629   delete_optimization();
01630   use_poim_tries=true;
01631   poim_tries.delete_trees(false);
01632 
01633   // === check
01634   ASSERT(position_weights_lhs==NULL);
01635   ASSERT(position_weights_rhs==NULL);
01636   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01637   ASSERT(num_feat>0);
01638   ASSERT(alphabet->get_alphabet()==DNA);
01639   ASSERT(max_degree>0);
01640 
01641   // === general variables
01642   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01643   const int32_t seqLen = num_feat;
01644   float64_t** subs;
01645   int32_t i;
01646   int32_t k;
01647   //int32_t y;
01648 
01649   // === init tables "subs" for substring scores / POIMs
01650   // --- compute table sizes
01651   int32_t* offsets;
01652   int32_t offset;
01653   offsets = new int32_t[ max_degree ];
01654   offset = 0;
01655   for( k = 0; k < max_degree; ++k ) {
01656     offsets[k] = offset;
01657     const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 );
01658     const int32_t tabSize = nofsKmers * seqLen;
01659     offset += tabSize;
01660   }
01661   // --- allocate memory
01662   const int32_t bigTabSize = offset;
01663   w_result=new float64_t[bigTabSize];
01664   for (i=0; i<bigTabSize; ++i)
01665     w_result[i]=0;
01666 
01667   // --- set pointers for tables
01668   subs = new float64_t*[ max_degree ];
01669   ASSERT( subs != NULL );
01670   for( k = 0; k < max_degree; ++k ) {
01671     subs[k] = &w_result[ offsets[k] ];
01672   }
01673   delete[] offsets;
01674 
01675   // === init trees; extract "w"
01676   init_optimization( num_suppvec, IDX, alphas, -1);
01677   poim_tries.POIMs_extract_W( subs, max_degree );
01678 
01679   // === clean; return "subs" as vector
01680   delete[] subs;
01681   num_feat = 1;
01682   num_sym = bigTabSize;
01683   use_poim_tries=false;
01684   poim_tries.delete_trees(false);
01685   return w_result;
01686 }
01687 
01688 float64_t* CWeightedDegreePositionStringKernel::compute_POIM(
01689     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01690     float64_t* poim_result, int32_t num_suppvec, int32_t* IDX,
01691     float64_t* alphas, float64_t* distrib )
01692 {
01693   delete_optimization();
01694   use_poim_tries=true;
01695   poim_tries.delete_trees(false);
01696 
01697   // === check
01698   ASSERT(position_weights_lhs==NULL);
01699   ASSERT(position_weights_rhs==NULL);
01700   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01701   ASSERT(num_feat>0);
01702   ASSERT(alphabet->get_alphabet()==DNA);
01703   ASSERT(max_degree!=0);
01704   ASSERT(distrib);
01705 
01706   // === general variables
01707   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01708   const int32_t seqLen = num_feat;
01709   float64_t** subs;
01710   int32_t i;
01711   int32_t k;
01712 
01713   // === DEBUGGING mode
01714   //
01715   // Activated if "max_degree" < 0.
01716   // Allows to output selected partial score.
01717   //
01718   // |max_degree| mod 4
01719   //   0: substring
01720   //   1: superstring
01721   //   2: left overlap
01722   //   3: right overlap
01723   //
01724   const int32_t debug = ( max_degree < 0 ) ? ( abs(max_degree) % 4 + 1 ) : 0;
01725   if( debug ) {
01726     max_degree = abs(max_degree) / 4;
01727     switch( debug ) {
01728     case 1: {
01729       printf( "POIM DEBUGGING: substring only (max order=%d)\n", max_degree );
01730       break;
01731     }
01732     case 2: {
01733       printf( "POIM DEBUGGING: superstring only (max order=%d)\n", max_degree );
01734       break;
01735     }
01736     case 3: {
01737       printf( "POIM DEBUGGING: left overlap only (max order=%d)\n", max_degree );
01738       break;
01739     }
01740     case 4: {
01741       printf( "POIM DEBUGGING: right overlap only (max order=%d)\n", max_degree );
01742       break;
01743     }
01744     default: {
01745       printf( "POIM DEBUGGING: something is wrong (max order=%d)\n", max_degree );
01746       ASSERT(0);
01747       break;
01748     }
01749     }
01750   }
01751 
01752   // --- compute table sizes
01753   int32_t* offsets;
01754   int32_t offset;
01755   offsets = new int32_t[ max_degree ];
01756   offset = 0;
01757   for( k = 0; k < max_degree; ++k ) {
01758     offsets[k] = offset;
01759     const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 );
01760     const int32_t tabSize = nofsKmers * seqLen;
01761     offset += tabSize;
01762   }
01763   // --- allocate memory
01764   const int32_t bigTabSize=offset;
01765   poim_result=new float64_t[bigTabSize];
01766   for (i=0; i<bigTabSize; ++i )
01767     poim_result[i]=0;
01768 
01769   // --- set pointers for tables
01770   subs=new float64_t*[max_degree];
01771   for (k=0; k<max_degree; ++k)
01772     subs[k]=&poim_result[offsets[k]];
01773 
01774   delete[] offsets;
01775 
01776   // === init trees; precalc S, L and R
01777   init_optimization( num_suppvec, IDX, alphas, -1);
01778   poim_tries.POIMs_precalc_SLR( distrib );
01779 
01780   // === compute substring scores
01781   if( debug==0 || debug==1 ) {
01782     poim_tries.POIMs_extract_W( subs, max_degree );
01783     for( k = 1; k < max_degree; ++k ) {
01784       const int32_t nofKmers2 = ( k > 1 ) ? (int32_t) CMath::pow(NUM_SYMS,k-1) : 0;
01785       const int32_t nofKmers1 = (int32_t) CMath::pow( NUM_SYMS, k );
01786       const int32_t nofKmers0 = nofKmers1 * NUM_SYMS;
01787       for( i = 0; i < seqLen; ++i ) {
01788     float64_t* const subs_k2i1 = ( k>1 && i<seqLen-1 ) ? &subs[k-2][(i+1)*nofKmers2] : NULL;
01789     float64_t* const subs_k1i1 = ( i < seqLen-1 ) ? &subs[k-1][(i+1)*nofKmers1] : NULL;
01790     float64_t* const subs_k1i0 = & subs[ k-1 ][ i*nofKmers1 ];
01791     float64_t* const subs_k0i  = & subs[ k-0 ][ i*nofKmers0 ];
01792     int32_t y0;
01793     for( y0 = 0; y0 < nofKmers0; ++y0 ) {
01794       const int32_t y1l = y0 / NUM_SYMS;
01795       const int32_t y1r = y0 % nofKmers1;
01796       const int32_t y2 = y1r / NUM_SYMS;
01797       subs_k0i[ y0 ] += subs_k1i0[ y1l ];
01798       if( i < seqLen-1 ) {
01799         subs_k0i[ y0 ] += subs_k1i1[ y1r ];
01800         if( k > 1 ) {
01801           subs_k0i[ y0 ] -= subs_k2i1[ y2 ];
01802         }
01803       }
01804     }
01805       }
01806     }
01807   }
01808 
01809   // === compute POIMs
01810   poim_tries.POIMs_add_SLR( subs, max_degree, debug );
01811 
01812   // === clean; return "subs" as vector
01813   delete[] subs;
01814   num_feat = 1;
01815   num_sym = bigTabSize;
01816 
01817   use_poim_tries=false;
01818   poim_tries.delete_trees(false);
01819   
01820   return poim_result;
01821 }
01822 
01823 
01824 void CWeightedDegreePositionStringKernel::prepare_POIM2(
01825     float64_t* distrib, int32_t num_sym, int32_t num_feat)
01826 {
01827     free(m_poim_distrib);
01828     m_poim_distrib=(float64_t*)malloc(num_sym*num_feat*sizeof(float64_t));
01829     ASSERT(m_poim_distrib);
01830 
01831     memcpy(m_poim_distrib, distrib, num_sym*num_feat*sizeof(float64_t));
01832     m_poim_num_sym=num_sym;
01833     m_poim_num_feat=num_feat;
01834 }
01835 
01836 void CWeightedDegreePositionStringKernel::compute_POIM2(
01837     int32_t max_degree, CSVM* svm)
01838 {
01839     ASSERT(svm);
01840     int32_t num_suppvec=svm->get_num_support_vectors();
01841     int32_t* sv_idx=new int32_t[num_suppvec];
01842     float64_t* sv_weight=new float64_t[num_suppvec];
01843 
01844     for (int32_t i=0; i<num_suppvec; i++)
01845     {
01846         sv_idx[i]=svm->get_support_vector(i);
01847         sv_weight[i]=svm->get_alpha(i);
01848     }
01849     
01850     if ((max_degree < 1) || (max_degree > 12))
01851     {
01852         //SG_WARNING( "max_degree out of range 1..12 (%d).\n", max_degree);
01853         SG_WARNING( "max_degree out of range 1..12 (%d). setting to 1.\n", max_degree);
01854         max_degree=1;
01855     }
01856     
01857     int32_t num_feat = m_poim_num_feat;
01858     int32_t num_sym = m_poim_num_sym;
01859     free(m_poim);
01860 
01861     m_poim = compute_POIM(max_degree, num_feat, num_sym, NULL,  num_suppvec, sv_idx, 
01862                           sv_weight, m_poim_distrib);
01863 
01864     ASSERT(num_feat==1);
01865     m_poim_result_len=num_sym;
01866     
01867     delete[] sv_weight;
01868     delete[] sv_idx;
01869 }
01870 
01871 void CWeightedDegreePositionStringKernel::get_POIM2(
01872     float64_t** poim, int32_t* result_len)
01873 {
01874     *poim=(float64_t*) malloc(m_poim_result_len*sizeof(float64_t));
01875     ASSERT(*poim);
01876     memcpy(*poim, m_poim, m_poim_result_len*sizeof(float64_t)) ;
01877     *result_len=m_poim_result_len ;
01878 }
01879 
01880 void CWeightedDegreePositionStringKernel::cleanup_POIM2()
01881 {
01882     free(m_poim) ;
01883     m_poim=NULL ;
01884     free(m_poim_distrib) ;
01885     m_poim_distrib=NULL ;
01886     m_poim_num_sym=0 ;
01887     m_poim_num_sym=0 ;
01888     m_poim_result_len=0 ;
01889 }
01890 
01891 void CWeightedDegreePositionStringKernel::load_serializable_post(void) throw (ShogunException)
01892 {
01893     CKernel::load_serializable_post();
01894 
01895     tries=CTrie<DNATrie>(degree);
01896     poim_tries=CTrie<POIMTrie>(degree);
01897 
01898     if (weights)
01899         init_block_weights();
01900 }
01901 
01902 void CWeightedDegreePositionStringKernel::init()
01903 {
01904     weights=NULL;
01905     position_weights=NULL;
01906     position_weights_len=0;
01907 
01908     position_weights_lhs=NULL;
01909     position_weights_lhs_len=0;
01910     position_weights_rhs=NULL;
01911     position_weights_rhs_len=0;
01912 
01913     weights_buffer=NULL;
01914     mkl_stepsize=1;
01915     degree=1;
01916     length=0;
01917 
01918     max_shift=0;
01919     max_mismatch=0;
01920     seq_length=0;
01921     shift=NULL;
01922     shift_len=0;
01923 
01924     block_weights=NULL;
01925     block_computation=true;
01926     type=E_EXTERNAL;
01927     which_degree=-1;
01928     tries=CTrie<DNATrie>(1);
01929     poim_tries=CTrie<POIMTrie>(1);
01930 
01931     tree_initialized=false;
01932     use_poim_tries=false;
01933     m_poim_distrib=NULL;
01934 
01935     m_poim=NULL;
01936     m_poim_num_sym=0;
01937     m_poim_num_feat=0;
01938     m_poim_result_len=0;
01939 
01940     alphabet=NULL;
01941 
01942     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
01943 
01944     set_normalizer(new CSqrtDiagKernelNormalizer());
01945 
01946     m_parameters->add_matrix(&weights, &weights_degree, &weights_length,
01947             "weights", "WD Kernel weights.");
01948     m_parameters->add_vector(&position_weights, &position_weights_len,
01949             "position_weights",
01950             "Weights per position.");
01951     m_parameters->add_vector(&position_weights_lhs, &position_weights_lhs_len,
01952             "position_weights_lhs",
01953             "Weights per position left hand side.");
01954     m_parameters->add_vector(&position_weights_rhs, &position_weights_rhs_len,
01955             "position_weights_rhs",
01956             "Weights per position right hand side.");
01957     m_parameters->add_vector(&shift, &shift_len,
01958             "shift",
01959             "Shift Vector.");
01960     m_parameters->add(&mkl_stepsize, "mkl_stepsize", "MKL step size.");
01961     m_parameters->add(&degree, "degree", "Order of WD kernel.");
01962     m_parameters->add(&max_mismatch, "max_mismatch",
01963             "Number of allowed mismatches.");
01964     m_parameters->add(&block_computation, "block_computation",
01965             "If block computation shall be used.");
01966     m_parameters->add((machine_int_t*) &type, "type",
01967             "WeightedDegree kernel type.");
01968     m_parameters->add(&which_degree, "which_degree",
01969             "Unqueal -1 if just a single degree is selected.");
01970     m_parameters->add((CSGObject**) &alphabet, "alphabet",
01971             "Alphabet of Features.");
01972 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation