WeightedDegreePositionStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/lib/common.h>
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/lib/Signal.h>
00015 #include <shogun/lib/Trie.h>
00016 #include <shogun/base/Parallel.h>
00017 
00018 #include <shogun/kernel/WeightedDegreePositionStringKernel.h>
00019 #include <shogun/kernel/SqrtDiagKernelNormalizer.h>
00020 #include <shogun/features/Features.h>
00021 #include <shogun/features/StringFeatures.h>
00022 
00023 #include <shogun/classifier/svm/SVM.h>
00024 
00025 #ifdef HAVE_PTHREAD
00026 #include <pthread.h>
00027 #endif
00028 
00029 using namespace shogun;
00030 
00031 #define TRIES(X) ((use_poim_tries) ? (poim_tries.X) : (tries.X))
00032 
00033 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00034 template <class Trie> struct S_THREAD_PARAM 
00035 {
00036     int32_t* vec;
00037     float64_t* result;
00038     float64_t* weights;
00039     CWeightedDegreePositionStringKernel* kernel;
00040     CTrie<Trie>* tries;
00041     float64_t factor;
00042     int32_t j;
00043     int32_t start;
00044     int32_t end;
00045     int32_t length;
00046     int32_t max_shift;
00047     int32_t* shift;
00048     int32_t* vec_idx;
00049 };
00050 #endif // DOXYGEN_SHOULD_SKIP_THIS
00051 
00052 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00053     void)
00054 : CStringKernel<char>()
00055 {
00056     init();
00057 }
00058 
00059 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00060     int32_t size, int32_t d, int32_t mm, int32_t mkls)
00061 : CStringKernel<char>(size)
00062 {
00063     init();
00064 
00065     mkl_stepsize=mkls;
00066     degree=d;
00067     max_mismatch=mm;
00068 
00069     tries=CTrie<DNATrie>(d);
00070     poim_tries=CTrie<POIMTrie>(d);
00071 
00072     set_wd_weights();
00073     ASSERT(weights);
00074 }
00075 
00076 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00077     int32_t size, float64_t* w, int32_t d, int32_t mm, int32_t* s, int32_t sl,
00078     int32_t mkls)
00079 : CStringKernel<char>(size)
00080 {
00081     init();
00082 
00083     mkl_stepsize=mkls;
00084     degree=d;
00085     max_mismatch=mm;
00086 
00087     tries=CTrie<DNATrie>(d);
00088     poim_tries=CTrie<POIMTrie>(d);
00089 
00090     weights=SG_MALLOC(float64_t, d*(1+max_mismatch));
00091     weights_degree=degree;
00092     weights_length=(1+max_mismatch);
00093 
00094     for (int32_t i=0; i<d*(1+max_mismatch); i++)
00095         weights[i]=w[i];
00096 
00097     set_shifts(SGVector<int32_t>(s, sl));
00098 }
00099 
00100 CWeightedDegreePositionStringKernel::CWeightedDegreePositionStringKernel(
00101     CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d)
00102 : CStringKernel<char>()
00103 {
00104     init();
00105 
00106     mkl_stepsize=1;
00107     degree=d;
00108 
00109     tries=CTrie<DNATrie>(d);
00110     poim_tries=CTrie<POIMTrie>(d);
00111 
00112     set_wd_weights();
00113     ASSERT(weights);
00114 
00115     init(l, r);
00116 }
00117 
00118 
00119 CWeightedDegreePositionStringKernel::~CWeightedDegreePositionStringKernel()
00120 {
00121     cleanup();
00122     cleanup_POIM2();
00123 
00124     SG_FREE(shift);
00125     shift=NULL;
00126 
00127     SG_FREE(weights);
00128     weights=NULL;
00129     weights_degree=0;
00130     weights_length=0;
00131 
00132     SG_FREE(block_weights);
00133     block_weights=NULL;
00134 
00135     SG_FREE(position_weights);
00136     position_weights=NULL;
00137 
00138     SG_FREE(position_weights_lhs);
00139     position_weights_lhs=NULL;
00140 
00141     SG_FREE(position_weights_rhs);
00142     position_weights_rhs=NULL;
00143 
00144     SG_FREE(weights_buffer);
00145     weights_buffer=NULL;
00146 }
00147 
00148 void CWeightedDegreePositionStringKernel::remove_lhs()
00149 {
00150     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00151     delete_optimization();
00152 
00153     tries.destroy();
00154     poim_tries.destroy();
00155 
00156     CKernel::remove_lhs();
00157 }
00158 
00159 void CWeightedDegreePositionStringKernel::create_empty_tries()
00160 {
00161     ASSERT(lhs);
00162     seq_length = ((CStringFeatures<char>*) lhs)->get_max_vector_length();
00163 
00164     if (opt_type==SLOWBUTMEMEFFICIENT)
00165     {
00166         tries.create(seq_length, true); 
00167         poim_tries.create(seq_length, true); 
00168     }
00169     else if (opt_type==FASTBUTMEMHUNGRY)
00170     {
00171         tries.create(seq_length, false);  // still buggy
00172         poim_tries.create(seq_length, false);  // still buggy
00173     }
00174     else
00175         SG_ERROR( "unknown optimization type\n");
00176 }
00177 
00178 bool CWeightedDegreePositionStringKernel::init(CFeatures* l, CFeatures* r)
00179 {
00180     int32_t lhs_changed = (lhs!=l) ;
00181     int32_t rhs_changed = (rhs!=r) ;
00182 
00183     CStringKernel<char>::init(l,r);
00184 
00185     SG_DEBUG( "lhs_changed: %i\n", lhs_changed) ;
00186     SG_DEBUG( "rhs_changed: %i\n", rhs_changed) ;
00187 
00188     CStringFeatures<char>* sf_l=(CStringFeatures<char>*) l;
00189     CStringFeatures<char>* sf_r=(CStringFeatures<char>*) r;
00190 
00191     /* set shift */
00192     if (shift_len==0) {
00193         shift_len=sf_l->get_vector_length(0);
00194         int32_t *shifts=SG_MALLOC(int32_t, shift_len);
00195         for (int32_t i=0; i<shift_len; i++) {
00196             shifts[i]=1;
00197         }
00198         set_shifts(SGVector<int32_t>(shifts, shift_len));
00199         SG_FREE(shifts);
00200     }
00201 
00202 
00203     int32_t len=sf_l->get_max_vector_length();
00204     if (lhs_changed && !sf_l->have_same_length(len))
00205         SG_ERROR("All strings in WD kernel must have same length (lhs wrong)!\n");
00206 
00207     if (rhs_changed && !sf_r->have_same_length(len))
00208         SG_ERROR("All strings in WD kernel must have same length (rhs wrong)!\n");
00209 
00210     SG_UNREF(alphabet);
00211     alphabet= sf_l->get_alphabet();
00212     CAlphabet* ralphabet=sf_r->get_alphabet();
00213 
00214     if (!((alphabet->get_alphabet()==DNA) || (alphabet->get_alphabet()==RNA)))
00215         properties &= ((uint64_t) (-1)) ^ (KP_LINADD | KP_BATCHEVALUATION);
00216 
00217     ASSERT(ralphabet->get_alphabet()==alphabet->get_alphabet());
00218     SG_UNREF(ralphabet);
00219 
00220     //whenever init is called also init tries and block weights
00221     create_empty_tries();
00222     init_block_weights();
00223 
00224     return init_normalizer();
00225 }
00226 
00227 void CWeightedDegreePositionStringKernel::cleanup()
00228 {
00229     SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00230     delete_optimization();
00231 
00232     SG_FREE(block_weights);
00233     block_weights=NULL;
00234 
00235     tries.destroy();
00236     poim_tries.destroy();
00237 
00238     seq_length = 0;
00239     tree_initialized = false;
00240 
00241     SG_UNREF(alphabet);
00242     alphabet=NULL;
00243 
00244     CKernel::cleanup();
00245 }
00246 
00247 bool CWeightedDegreePositionStringKernel::init_optimization(
00248     int32_t p_count, int32_t * IDX, float64_t * alphas, int32_t tree_num,
00249     int32_t upto_tree)
00250 {
00251     ASSERT(position_weights_lhs==NULL);
00252     ASSERT(position_weights_rhs==NULL);
00253 
00254     if (upto_tree<0)
00255         upto_tree=tree_num;
00256 
00257     if (max_mismatch!=0)
00258     {
00259         SG_ERROR( "CWeightedDegreePositionStringKernel optimization not implemented for mismatch!=0\n");
00260         return false ;
00261     }
00262 
00263     if (tree_num<0)
00264         SG_DEBUG( "deleting CWeightedDegreePositionStringKernel optimization\n");
00265 
00266     delete_optimization();
00267 
00268     if (tree_num<0)
00269         SG_DEBUG( "initializing CWeightedDegreePositionStringKernel optimization\n") ;
00270 
00271     for (int32_t i=0; i<p_count; i++)
00272     {
00273         if (tree_num<0)
00274         {
00275             if ( (i % (p_count/10+1)) == 0)
00276                 SG_PROGRESS(i,0,p_count);
00277             add_example_to_tree(IDX[i], alphas[i]);
00278         }
00279         else
00280         {
00281             for (int32_t t=tree_num; t<=upto_tree; t++)
00282                 add_example_to_single_tree(IDX[i], alphas[i], t);
00283         }
00284     }
00285 
00286     if (tree_num<0)
00287         SG_DONE();
00288 
00289     set_is_initialized(true) ;
00290     return true ;
00291 }
00292 
00293 bool CWeightedDegreePositionStringKernel::delete_optimization() 
00294 { 
00295     if ((opt_type==FASTBUTMEMHUNGRY) && (tries.get_use_compact_terminal_nodes())) 
00296     {
00297         tries.set_use_compact_terminal_nodes(false) ;
00298         SG_DEBUG( "disabling compact trie nodes with FASTBUTMEMHUNGRY\n") ;
00299     }
00300 
00301     if (get_is_initialized())
00302     {
00303         if (opt_type==SLOWBUTMEMEFFICIENT)
00304             tries.delete_trees(true); 
00305         else if (opt_type==FASTBUTMEMHUNGRY)
00306             tries.delete_trees(false);  // still buggy
00307         else {
00308             SG_ERROR( "unknown optimization type\n");
00309         }
00310         set_is_initialized(false);
00311 
00312         return true;
00313     }
00314 
00315     return false;
00316 }
00317 
00318 float64_t CWeightedDegreePositionStringKernel::compute_with_mismatch(
00319     char* avec, int32_t alen, char* bvec, int32_t blen)
00320 {
00321     float64_t* max_shift_vec= SG_MALLOC(float64_t, max_shift);
00322     float64_t sum0=0 ;
00323     for (int32_t i=0; i<max_shift; i++)
00324         max_shift_vec[i]=0 ;
00325     
00326     // no shift
00327     for (int32_t i=0; i<alen; i++)
00328     {
00329         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00330             continue ;
00331         
00332         int32_t mismatches=0;
00333         float64_t sumi = 0.0 ;
00334         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00335         {
00336             if (avec[i+j]!=bvec[i+j])
00337             {
00338                 mismatches++ ;
00339                 if (mismatches>max_mismatch)
00340                     break ;
00341             } ;
00342             sumi += weights[j+degree*mismatches];
00343         }
00344         if (position_weights!=NULL)
00345             sum0 += position_weights[i]*sumi ;
00346         else
00347             sum0 += sumi ;
00348     } ;
00349     
00350     for (int32_t i=0; i<alen; i++)
00351     {
00352         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00353         {
00354             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00355                 continue ;
00356             
00357             float64_t sumi1 = 0.0 ;
00358             // shift in sequence a
00359             int32_t mismatches=0;
00360             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00361             {
00362                 if (avec[i+j+k]!=bvec[i+j])
00363                 {
00364                     mismatches++ ;
00365                     if (mismatches>max_mismatch)
00366                         break ;
00367                 } ;
00368                 sumi1 += weights[j+degree*mismatches];
00369             }
00370             float64_t sumi2 = 0.0 ;
00371             // shift in sequence b
00372             mismatches=0;
00373             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00374             {
00375                 if (avec[i+j]!=bvec[i+j+k])
00376                 {
00377                     mismatches++ ;
00378                     if (mismatches>max_mismatch)
00379                         break ;
00380                 } ;
00381                 sumi2 += weights[j+degree*mismatches];
00382             }
00383             if (position_weights!=NULL)
00384                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00385             else
00386                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00387         } ;
00388     }
00389     
00390     float64_t result = sum0 ;
00391     for (int32_t i=0; i<max_shift; i++)
00392         result += max_shift_vec[i]/(2*(i+1)) ;
00393     
00394     SG_FREE(max_shift_vec);
00395     return result ;
00396 }
00397 
00398 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch(
00399     char* avec, int32_t alen, char* bvec, int32_t blen) 
00400 {
00401     float64_t* max_shift_vec = SG_MALLOC(float64_t, max_shift);
00402     float64_t sum0=0 ;
00403     for (int32_t i=0; i<max_shift; i++)
00404         max_shift_vec[i]=0 ;
00405 
00406     // no shift
00407     for (int32_t i=0; i<alen; i++)
00408     {
00409         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00410             continue ;
00411 
00412         float64_t sumi = 0.0 ;
00413         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00414         {
00415             if (avec[i+j]!=bvec[i+j])
00416                 break ;
00417             sumi += weights[j];
00418         }
00419         if (position_weights!=NULL)
00420             sum0 += position_weights[i]*sumi ;
00421         else
00422             sum0 += sumi ;
00423     } ;
00424 
00425     for (int32_t i=0; i<alen; i++)
00426     {
00427         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00428         {
00429             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00430                 continue ;
00431 
00432             float64_t sumi1 = 0.0 ;
00433             // shift in sequence a
00434             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00435             {
00436                 if (avec[i+j+k]!=bvec[i+j])
00437                     break ;
00438                 sumi1 += weights[j];
00439             }
00440             float64_t sumi2 = 0.0 ;
00441             // shift in sequence b
00442             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00443             {
00444                 if (avec[i+j]!=bvec[i+j+k])
00445                     break ;
00446                 sumi2 += weights[j];
00447             }
00448             if (position_weights!=NULL)
00449                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00450             else
00451                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00452         } ;
00453     }
00454 
00455     float64_t result = sum0 ;
00456     for (int32_t i=0; i<max_shift; i++)
00457         result += max_shift_vec[i]/(2*(i+1)) ;
00458 
00459     SG_FREE(max_shift_vec);
00460 
00461     return result ;
00462 }
00463 
00464 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_matrix(
00465     char* avec, int32_t alen, char* bvec, int32_t blen) 
00466 {
00467     float64_t* max_shift_vec = SG_MALLOC(float64_t, max_shift);
00468     float64_t sum0=0 ;
00469     for (int32_t i=0; i<max_shift; i++)
00470         max_shift_vec[i]=0 ;
00471 
00472     // no shift
00473     for (int32_t i=0; i<alen; i++)
00474     {
00475         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00476             continue ;
00477         float64_t sumi = 0.0 ;
00478         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00479         {
00480             if (avec[i+j]!=bvec[i+j])
00481                 break ;
00482             sumi += weights[i*degree+j];
00483         }
00484         if (position_weights!=NULL)
00485             sum0 += position_weights[i]*sumi ;
00486         else
00487             sum0 += sumi ;
00488     } ;
00489 
00490     for (int32_t i=0; i<alen; i++)
00491     {
00492         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00493         {
00494             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00495                 continue ;
00496 
00497             float64_t sumi1 = 0.0 ;
00498             // shift in sequence a
00499             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00500             {
00501                 if (avec[i+j+k]!=bvec[i+j])
00502                     break ;
00503                 sumi1 += weights[i*degree+j];
00504             }
00505             float64_t sumi2 = 0.0 ;
00506             // shift in sequence b
00507             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00508             {
00509                 if (avec[i+j]!=bvec[i+j+k])
00510                     break ;
00511                 sumi2 += weights[i*degree+j];
00512             }
00513             if (position_weights!=NULL)
00514                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00515             else
00516                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00517         } ;
00518     }
00519 
00520     float64_t result = sum0 ;
00521     for (int32_t i=0; i<max_shift; i++)
00522         result += max_shift_vec[i]/(2*(i+1)) ;
00523 
00524     SG_FREE(max_shift_vec);
00525     return result ;
00526 }
00527 
00528 float64_t CWeightedDegreePositionStringKernel::compute_without_mismatch_position_weights(
00529     char* avec, float64_t* pos_weights_lhs, int32_t alen, char* bvec,
00530     float64_t* pos_weights_rhs, int32_t blen)
00531 {
00532     float64_t* max_shift_vec = SG_MALLOC(float64_t, max_shift);
00533     float64_t sum0=0 ;
00534     for (int32_t i=0; i<max_shift; i++)
00535         max_shift_vec[i]=0 ;
00536 
00537     // no shift
00538     for (int32_t i=0; i<alen; i++)
00539     {
00540         if ((position_weights!=NULL) && (position_weights[i]==0.0))
00541             continue ;
00542 
00543         float64_t sumi = 0.0 ;
00544         float64_t posweight_lhs = 0.0 ;
00545         float64_t posweight_rhs = 0.0 ;
00546         for (int32_t j=0; (j<degree) && (i+j<alen); j++)
00547         {
00548             posweight_lhs += pos_weights_lhs[i+j] ;
00549             posweight_rhs += pos_weights_rhs[i+j] ;
00550             
00551             if (avec[i+j]!=bvec[i+j])
00552                 break ;
00553             sumi += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00554         }
00555         if (position_weights!=NULL)
00556             sum0 += position_weights[i]*sumi ;
00557         else
00558             sum0 += sumi ;
00559     } ;
00560 
00561     for (int32_t i=0; i<alen; i++)
00562     {
00563         for (int32_t k=1; (k<=shift[i]) && (i+k<alen); k++)
00564         {
00565             if ((position_weights!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
00566                 continue ;
00567 
00568             // shift in sequence a  
00569             float64_t sumi1 = 0.0 ;
00570             float64_t posweight_lhs = 0.0 ;
00571             float64_t posweight_rhs = 0.0 ;
00572             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00573             {
00574                 posweight_lhs += pos_weights_lhs[i+j+k] ;
00575                 posweight_rhs += pos_weights_rhs[i+j] ;
00576                 if (avec[i+j+k]!=bvec[i+j])
00577                     break ;
00578                 sumi1 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00579             }
00580             // shift in sequence b
00581             float64_t sumi2 = 0.0 ;
00582             posweight_lhs = 0.0 ;
00583             posweight_rhs = 0.0 ;
00584             for (int32_t j=0; (j<degree) && (i+j+k<alen); j++)
00585             {
00586                 posweight_lhs += pos_weights_lhs[i+j] ;
00587                 posweight_rhs += pos_weights_rhs[i+j+k] ;
00588                 if (avec[i+j]!=bvec[i+j+k])
00589                     break ;
00590                 sumi2 += weights[j]*(posweight_lhs/(j+1))*(posweight_rhs/(j+1)) ;
00591             }
00592             if (position_weights!=NULL)
00593                 max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
00594             else
00595                 max_shift_vec[k-1] += sumi1 + sumi2 ;
00596         } ;
00597     }
00598 
00599     float64_t result = sum0 ;
00600     for (int32_t i=0; i<max_shift; i++)
00601         result += max_shift_vec[i]/(2*(i+1)) ;
00602 
00603     SG_FREE(max_shift_vec);
00604     return result ;
00605 }
00606 
00607 
00608 float64_t CWeightedDegreePositionStringKernel::compute(
00609     int32_t idx_a, int32_t idx_b)
00610 {
00611     int32_t alen, blen;
00612     bool free_avec, free_bvec;
00613 
00614     char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00615     char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00616     // can only deal with strings of same length
00617     ASSERT(alen==blen);
00618     ASSERT(shift_len==alen);
00619 
00620     float64_t result = 0 ;
00621     if (position_weights_lhs!=NULL || position_weights_rhs!=NULL)
00622     {
00623         ASSERT(max_mismatch==0);
00624         float64_t* position_weights_rhs_ = position_weights_rhs ;
00625         if (lhs==rhs)
00626             position_weights_rhs_ = position_weights_lhs ;
00627         
00628         result = compute_without_mismatch_position_weights(avec, &position_weights_lhs[idx_a*alen], alen, bvec, &position_weights_rhs_[idx_b*blen], blen) ;
00629     }
00630     else if (max_mismatch > 0)
00631         result = compute_with_mismatch(avec, alen, bvec, blen) ;
00632     else if (length==0)
00633         result = compute_without_mismatch(avec, alen, bvec, blen) ;
00634     else
00635         result = compute_without_mismatch_matrix(avec, alen, bvec, blen) ;
00636 
00637     ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00638     ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00639 
00640     return result ;
00641 }
00642 
00643 
00644 void CWeightedDegreePositionStringKernel::add_example_to_tree(
00645     int32_t idx, float64_t alpha)
00646 {
00647     ASSERT(position_weights_lhs==NULL);
00648     ASSERT(position_weights_rhs==NULL);
00649     ASSERT(alphabet);
00650     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00651 
00652     int32_t len=0;
00653     bool free_vec;
00654     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
00655     ASSERT(max_mismatch==0);
00656     int32_t *vec = SG_MALLOC(int32_t, len);
00657 
00658     for (int32_t i=0; i<len; i++)
00659         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00660     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00661 
00662     if (opt_type==FASTBUTMEMHUNGRY)
00663     {
00664         //TRIES(set_use_compact_terminal_nodes(false)) ;
00665         ASSERT(!TRIES(get_use_compact_terminal_nodes()));
00666     }
00667 
00668     for (int32_t i=0; i<len; i++)
00669     {
00670         int32_t max_s=-1;
00671 
00672         if (opt_type==SLOWBUTMEMEFFICIENT)
00673             max_s=0;
00674         else if (opt_type==FASTBUTMEMHUNGRY)
00675             max_s=shift[i];
00676         else {
00677             SG_ERROR( "unknown optimization type\n");
00678         }
00679 
00680         for (int32_t s=max_s; s>=0; s--)
00681         {
00682             float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00683             TRIES(add_to_trie(i, s, vec, alpha_pw, weights, (length!=0))) ;
00684             if ((s==0) || (i+s>=len))
00685                 continue;
00686 
00687             TRIES(add_to_trie(i+s, -s, vec, alpha_pw, weights, (length!=0))) ;
00688         }
00689     }
00690 
00691     SG_FREE(vec);
00692     tree_initialized=true ;
00693 }
00694 
00695 void CWeightedDegreePositionStringKernel::add_example_to_single_tree(
00696     int32_t idx, float64_t alpha, int32_t tree_num) 
00697 {
00698     ASSERT(position_weights_lhs==NULL);
00699     ASSERT(position_weights_rhs==NULL);
00700     ASSERT(alphabet);
00701     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00702 
00703     int32_t len=0;
00704     bool free_vec;
00705     char* char_vec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx, len, free_vec);
00706     ASSERT(max_mismatch==0);
00707     int32_t *vec=SG_MALLOC(int32_t, len);
00708     int32_t max_s=-1;
00709 
00710     if (opt_type==SLOWBUTMEMEFFICIENT)
00711         max_s=0;
00712     else if (opt_type==FASTBUTMEMHUNGRY)
00713     {
00714         ASSERT(!tries.get_use_compact_terminal_nodes());
00715         max_s=shift[tree_num];
00716     }
00717     else {
00718         SG_ERROR( "unknown optimization type\n");
00719     }
00720     for (int32_t i=CMath::max(0,tree_num-max_shift);
00721             i<CMath::min(len,tree_num+degree+max_shift); i++)
00722     {
00723         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00724     } 
00725     ((CStringFeatures<char>*) lhs)->free_feature_vector(char_vec, idx, free_vec);
00726 
00727     for (int32_t s=max_s; s>=0; s--)
00728     {
00729         float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00730         tries.add_to_trie(tree_num, s, vec, alpha_pw, weights, (length!=0)) ;
00731     } 
00732 
00733     if (opt_type==FASTBUTMEMHUNGRY)
00734     {
00735         for (int32_t i=CMath::max(0,tree_num-max_shift); i<CMath::min(len,tree_num+max_shift+1); i++)
00736         {
00737             int32_t s=tree_num-i;
00738             if ((i+s<len) && (s>=1) && (s<=shift[i]))
00739             {
00740                 float64_t alpha_pw = normalizer->normalize_lhs((s==0) ? (alpha) : (alpha/(2.0*s)), idx);
00741                 tries.add_to_trie(tree_num, -s, vec, alpha_pw, weights, (length!=0)) ; 
00742             }
00743         }
00744     }
00745     SG_FREE(vec);
00746     tree_initialized=true ;
00747 }
00748 
00749 float64_t CWeightedDegreePositionStringKernel::compute_by_tree(int32_t idx)
00750 {
00751     ASSERT(position_weights_lhs==NULL);
00752     ASSERT(position_weights_rhs==NULL);
00753     ASSERT(alphabet);
00754     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00755 
00756     float64_t sum=0;
00757     int32_t len=0;
00758     bool free_vec;
00759     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
00760     ASSERT(max_mismatch==0);
00761     int32_t *vec=SG_MALLOC(int32_t, len);
00762 
00763     for (int32_t i=0; i<len; i++)
00764         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00765 
00766     ((CStringFeatures<char>*) rhs)->free_feature_vector(char_vec, idx, free_vec);
00767 
00768     for (int32_t i=0; i<len; i++)
00769         sum += tries.compute_by_tree_helper(vec, len, i, i, i, weights, (length!=0)) ;
00770 
00771     if (opt_type==SLOWBUTMEMEFFICIENT)
00772     {
00773         for (int32_t i=0; i<len; i++)
00774         {
00775             for (int32_t s=1; (s<=shift[i]) && (i+s<len); s++)
00776             {
00777                 sum+=tries.compute_by_tree_helper(vec, len, i, i+s, i, weights, (length!=0))/(2*s) ;
00778                 sum+=tries.compute_by_tree_helper(vec, len, i+s, i, i+s, weights, (length!=0))/(2*s) ;
00779             }
00780         }
00781     }
00782 
00783     SG_FREE(vec);
00784 
00785     return normalizer->normalize_rhs(sum, idx);
00786 }
00787 
00788 void CWeightedDegreePositionStringKernel::compute_by_tree(
00789     int32_t idx, float64_t* LevelContrib)
00790 {
00791     ASSERT(position_weights_lhs==NULL);
00792     ASSERT(position_weights_rhs==NULL);
00793     ASSERT(alphabet);
00794     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
00795 
00796     int32_t len=0;
00797     bool free_vec;
00798     char* char_vec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx, len, free_vec);
00799     ASSERT(max_mismatch==0);
00800     int32_t *vec=SG_MALLOC(int32_t, len);
00801 
00802     for (int32_t i=0; i<len; i++)
00803         vec[i]=alphabet->remap_to_bin(char_vec[i]);
00804 
00805     ((CStringFeatures<char>*) rhs)->free_feature_vector(char_vec, idx, free_vec);
00806 
00807     for (int32_t i=0; i<len; i++)
00808     {
00809         tries.compute_by_tree_helper(vec, len, i, i, i, LevelContrib,
00810                 normalizer->normalize_rhs(1.0, idx), mkl_stepsize, weights,
00811                 (length!=0));
00812     }
00813 
00814     if (opt_type==SLOWBUTMEMEFFICIENT)
00815     {
00816         for (int32_t i=0; i<len; i++)
00817             for (int32_t k=1; (k<=shift[i]) && (i+k<len); k++)
00818             {
00819                 tries.compute_by_tree_helper(vec, len, i, i+k, i, LevelContrib,
00820                         normalizer->normalize_rhs(1.0/(2*k), idx), mkl_stepsize,
00821                         weights, (length!=0)) ;
00822                 tries.compute_by_tree_helper(vec, len, i+k, i, i+k,
00823                         LevelContrib, normalizer->normalize_rhs(1.0/(2*k), idx),
00824                         mkl_stepsize, weights, (length!=0)) ;
00825             }
00826     }
00827 
00828     SG_FREE(vec);
00829 }
00830 
00831 float64_t* CWeightedDegreePositionStringKernel::compute_abs_weights(
00832     int32_t &len)
00833 {
00834     return tries.compute_abs_weights(len);
00835 }
00836 
00837 void CWeightedDegreePositionStringKernel::set_shifts(SGVector<int32_t> shifts)
00838 {
00839     SG_FREE(shift);
00840 
00841     shift_len = shifts.vlen;
00842     shift = SG_MALLOC(int32_t, shift_len);
00843 
00844     if (shift)
00845     {
00846         max_shift = 0 ;
00847 
00848         for (int32_t i=0; i<shift_len; i++)
00849         {
00850             shift[i] = shifts.vector[i] ;
00851             max_shift = CMath::max(shift[i], max_shift);
00852         }
00853 
00854         ASSERT(max_shift>=0 && max_shift<=shift_len);
00855     }
00856 }
00857 
00858 bool CWeightedDegreePositionStringKernel::set_wd_weights()
00859 {
00860     ASSERT(degree>0);
00861 
00862     SG_FREE(weights);
00863     weights=SG_MALLOC(float64_t, degree);
00864     weights_degree=degree;
00865     weights_length=1;
00866 
00867     if (weights)
00868     {
00869         int32_t i;
00870         float64_t sum=0;
00871         for (i=0; i<degree; i++)
00872         {
00873             weights[i]=degree-i;
00874             sum+=weights[i];
00875         }
00876         for (i=0; i<degree; i++)
00877             weights[i]/=sum;
00878 
00879         for (i=0; i<degree; i++)
00880         {
00881             for (int32_t j=1; j<=max_mismatch; j++)
00882             {
00883                 if (j<i+1)
00884                 {
00885                     int32_t nk=CMath::nchoosek(i+1, j);
00886                     weights[i+j*degree]=weights[i]/(nk*CMath::pow(3.0,j));
00887                 }
00888                 else
00889                     weights[i+j*degree]= 0;
00890             }
00891         }
00892 
00893         return true;
00894     }
00895     else
00896         return false;
00897 }
00898 
00899 bool CWeightedDegreePositionStringKernel::set_weights(SGMatrix<float64_t> new_weights)
00900 {
00901     float64_t* ws=new_weights.matrix;
00902     int32_t d=new_weights.num_rows;
00903     int32_t len=new_weights.num_cols;
00904 
00905     if (d!=degree || len<0)
00906         SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree);
00907 
00908     degree=d;
00909     length=len;
00910 
00911     if (len <= 0)
00912         len=1;
00913 
00914     weights_degree=degree;
00915     weights_length=len+max_mismatch;
00916 
00917     SG_DEBUG("Creating weights of size %dx%d\n", weights_degree, weights_length);
00918     int32_t num_weights=weights_degree*weights_length;
00919     SG_FREE(weights);
00920     weights=SG_MALLOC(float64_t, num_weights);
00921 
00922     for (int32_t i=0; i<degree*len; i++)
00923         weights[i]=ws[i];
00924 
00925     return true;
00926 }
00927 
00928 void CWeightedDegreePositionStringKernel::set_position_weights(SGVector<float64_t> pws)
00929 {
00930     if (seq_length==0)
00931         seq_length=pws.vlen;
00932 
00933     if (seq_length!=pws.vlen)
00934         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, pws.vlen);
00935 
00936     SG_FREE(position_weights);
00937     position_weights=SG_MALLOC(float64_t, pws.vlen);
00938     position_weights_len=pws.vlen;
00939     tries.set_position_weights(position_weights);
00940 
00941     for (int32_t i=0; i<pws.vlen; i++)
00942         position_weights[i]=pws.vector[i];
00943 }
00944 
00945 bool CWeightedDegreePositionStringKernel::set_position_weights_lhs(float64_t* pws, int32_t len, int32_t num)
00946 {
00947     if (position_weights_rhs==position_weights_lhs)
00948         position_weights_rhs=NULL;
00949     else
00950         delete_position_weights_rhs();
00951 
00952     if (len==0)
00953     {
00954         return delete_position_weights_lhs();
00955     }
00956     
00957     if (seq_length!=len)
00958     {
00959         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00960         return false;
00961     }
00962     
00963     SG_FREE(position_weights_lhs);
00964     position_weights_lhs=SG_MALLOC(float64_t, len*num);
00965     position_weights_lhs_len=len*num;
00966 
00967     for (int32_t i=0; i<len*num; i++)
00968         position_weights_lhs[i]=pws[i];
00969 
00970     return true;
00971 }
00972 
00973 bool CWeightedDegreePositionStringKernel::set_position_weights_rhs(
00974     float64_t* pws, int32_t len, int32_t num)
00975 {
00976     if (len==0)
00977     {
00978         if (position_weights_rhs==position_weights_lhs)
00979         {
00980             position_weights_rhs=NULL;
00981             return true;
00982         }
00983         return delete_position_weights_rhs();
00984     }
00985 
00986     if (seq_length!=len)
00987     {
00988         SG_ERROR("seq_length = %i, position_weights_length=%i\n", seq_length, len);
00989         return false;
00990     }
00991     
00992     SG_FREE(position_weights_rhs);
00993     position_weights_rhs=SG_MALLOC(float64_t, len*num);
00994     position_weights_rhs_len=len*num;
00995 
00996     for (int32_t i=0; i<len*num; i++)
00997         position_weights_rhs[i]=pws[i];
00998 
00999     return true;
01000 }
01001 
01002 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd()
01003 {
01004     SG_FREE(block_weights);
01005     block_weights=SG_MALLOC(float64_t, CMath::max(seq_length,degree));
01006 
01007     if (block_weights)
01008     {
01009         int32_t k;
01010         float64_t d=degree; // use float to evade rounding errors below
01011 
01012         for (k=0; k<degree; k++)
01013             block_weights[k]=
01014                 (-CMath::pow(k, 3)+(3*d-3)*CMath::pow(k, 2)+(9*d-2)*k+6*d)/(3*d*(d+1));
01015         for (k=degree; k<seq_length; k++)
01016             block_weights[k]=(-d+3*k+4)/3;
01017     }
01018 
01019     return (block_weights!=NULL);
01020 }
01021 
01022 bool CWeightedDegreePositionStringKernel::init_block_weights_from_wd_external()
01023 {
01024     ASSERT(weights);
01025     SG_FREE(block_weights);
01026     block_weights=SG_MALLOC(float64_t, CMath::max(seq_length,degree));
01027 
01028     if (block_weights)
01029     {
01030         int32_t i=0;
01031         block_weights[0]=weights[0];
01032         for (i=1; i<CMath::max(seq_length,degree); i++)
01033             block_weights[i]=0;
01034 
01035         for (i=1; i<CMath::max(seq_length,degree); i++)
01036         {
01037             block_weights[i]=block_weights[i-1];
01038 
01039             float64_t contrib=0;
01040             for (int32_t j=0; j<CMath::min(degree,i+1); j++)
01041                 contrib+=weights[j];
01042 
01043             block_weights[i]+=contrib;
01044         }
01045     }
01046 
01047     return (block_weights!=NULL);
01048 }
01049 
01050 bool CWeightedDegreePositionStringKernel::init_block_weights_const()
01051 {
01052     SG_FREE(block_weights);
01053     block_weights=SG_MALLOC(float64_t, seq_length);
01054 
01055     if (block_weights)
01056     {
01057         for (int32_t i=1; i<seq_length+1 ; i++)
01058             block_weights[i-1]=1.0/seq_length;
01059     }
01060 
01061     return (block_weights!=NULL);
01062 }
01063 
01064 bool CWeightedDegreePositionStringKernel::init_block_weights_linear()
01065 {
01066     SG_FREE(block_weights);
01067     block_weights=SG_MALLOC(float64_t, seq_length);
01068 
01069     if (block_weights)
01070     {
01071         for (int32_t i=1; i<seq_length+1 ; i++)
01072             block_weights[i-1]=degree*i;
01073     }
01074 
01075     return (block_weights!=NULL);
01076 }
01077 
01078 bool CWeightedDegreePositionStringKernel::init_block_weights_sqpoly()
01079 {
01080     SG_FREE(block_weights);
01081     block_weights=SG_MALLOC(float64_t, seq_length);
01082 
01083     if (block_weights)
01084     {
01085         for (int32_t i=1; i<degree+1 ; i++)
01086             block_weights[i-1]=((float64_t) i)*i;
01087 
01088         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01089             block_weights[i-1]=i;
01090     }
01091 
01092     return (block_weights!=NULL);
01093 }
01094 
01095 bool CWeightedDegreePositionStringKernel::init_block_weights_cubicpoly()
01096 {
01097     SG_FREE(block_weights);
01098     block_weights=SG_MALLOC(float64_t, seq_length);
01099 
01100     if (block_weights)
01101     {
01102         for (int32_t i=1; i<degree+1 ; i++)
01103             block_weights[i-1]=((float64_t) i)*i*i;
01104 
01105         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01106             block_weights[i-1]=i;
01107     }
01108 
01109     return (block_weights!=NULL);
01110 }
01111 
01112 bool CWeightedDegreePositionStringKernel::init_block_weights_exp()
01113 {
01114     SG_FREE(block_weights);
01115     block_weights=SG_MALLOC(float64_t, seq_length);
01116 
01117     if (block_weights)
01118     {
01119         for (int32_t i=1; i<degree+1 ; i++)
01120             block_weights[i-1]=exp(((float64_t) i/10.0));
01121 
01122         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01123             block_weights[i-1]=i;
01124     }
01125 
01126     return (block_weights!=NULL);
01127 }
01128 
01129 bool CWeightedDegreePositionStringKernel::init_block_weights_log()
01130 {
01131     SG_FREE(block_weights);
01132     block_weights=SG_MALLOC(float64_t, seq_length);
01133 
01134     if (block_weights)
01135     {
01136         for (int32_t i=1; i<degree+1 ; i++)
01137             block_weights[i-1]=CMath::pow(CMath::log((float64_t) i),2);
01138 
01139         for (int32_t i=degree+1; i<seq_length+1 ; i++)
01140             block_weights[i-1]=i-degree+1+CMath::pow(CMath::log(degree+1.0),2);
01141     }
01142 
01143     return (block_weights!=NULL);
01144 }
01145 
01146 bool CWeightedDegreePositionStringKernel::init_block_weights()
01147 {
01148     switch (type)
01149     {
01150         case E_WD:
01151             return init_block_weights_from_wd();
01152         case E_EXTERNAL:
01153             return init_block_weights_from_wd_external();
01154         case E_BLOCK_CONST:
01155             return init_block_weights_const();
01156         case E_BLOCK_LINEAR:
01157             return init_block_weights_linear();
01158         case E_BLOCK_SQPOLY:
01159             return init_block_weights_sqpoly();
01160         case E_BLOCK_CUBICPOLY:
01161             return init_block_weights_cubicpoly();
01162         case E_BLOCK_EXP:
01163             return init_block_weights_exp();
01164         case E_BLOCK_LOG:
01165             return init_block_weights_log();
01166     };
01167     return false;
01168 }
01169 
01170 
01171 
01172 void* CWeightedDegreePositionStringKernel::compute_batch_helper(void* p)
01173 {
01174     S_THREAD_PARAM<DNATrie>* params = (S_THREAD_PARAM<DNATrie>*) p;
01175     int32_t j=params->j;
01176     CWeightedDegreePositionStringKernel* wd=params->kernel;
01177     CTrie<DNATrie>* tries=params->tries;
01178     float64_t* weights=params->weights;
01179     int32_t length=params->length;
01180     int32_t max_shift=params->max_shift;
01181     int32_t* vec=params->vec;
01182     float64_t* result=params->result;
01183     float64_t factor=params->factor;
01184     int32_t* shift=params->shift;
01185     int32_t* vec_idx=params->vec_idx;
01186 
01187     for (int32_t i=params->start; i<params->end; i++)
01188     {
01189         int32_t len=0;
01190         CStringFeatures<char>* rhs_feat=((CStringFeatures<char>*) wd->get_rhs());
01191         CAlphabet* alpha=wd->alphabet;
01192 
01193         bool free_vec;
01194         char* char_vec=rhs_feat->get_feature_vector(vec_idx[i], len, free_vec);
01195         for (int32_t k=CMath::max(0,j-max_shift); k<CMath::min(len,j+wd->get_degree()+max_shift); k++)
01196             vec[k]=alpha->remap_to_bin(char_vec[k]);
01197         rhs_feat->free_feature_vector(char_vec, vec_idx[i], free_vec);
01198 
01199         SG_UNREF(rhs_feat);
01200 
01201         result[i] += factor*wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec, len, j, j, j, weights, (length!=0)), vec_idx[i]);
01202 
01203         if (wd->get_optimization_type()==SLOWBUTMEMEFFICIENT)
01204         {
01205             for (int32_t q=CMath::max(0,j-max_shift); q<CMath::min(len,j+max_shift+1); q++)
01206             {
01207                 int32_t s=j-q ;
01208                 if ((s>=1) && (s<=shift[q]) && (q+s<len))
01209                 {
01210                     result[i] +=
01211                         wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01212                                 len, q, q+s, q, weights, (length!=0)),
01213                                 vec_idx[i])/(2.0*s);
01214                 }
01215             }
01216 
01217             for (int32_t s=1; (s<=shift[j]) && (j+s<len); s++)
01218             {
01219                 result[i] +=
01220                     wd->normalizer->normalize_rhs(tries->compute_by_tree_helper(vec,
01221                                 len, j+s, j, j+s, weights, (length!=0)),
01222                                 vec_idx[i])/(2.0*s);
01223             }
01224         }
01225     }
01226 
01227     return NULL;
01228 }
01229 
01230 void CWeightedDegreePositionStringKernel::compute_batch(
01231     int32_t num_vec, int32_t* vec_idx, float64_t* result, int32_t num_suppvec,
01232     int32_t* IDX, float64_t* alphas, float64_t factor)
01233 {
01234     ASSERT(alphabet);
01235     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01236     ASSERT(position_weights_lhs==NULL);
01237     ASSERT(position_weights_rhs==NULL);
01238     ASSERT(rhs);
01239     ASSERT(num_vec<=rhs->get_num_vectors());
01240     ASSERT(num_vec>0);
01241     ASSERT(vec_idx);
01242     ASSERT(result);
01243     create_empty_tries();
01244 
01245     int32_t num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01246     ASSERT(num_feat>0);
01247     int32_t num_threads=parallel->get_num_threads();
01248     ASSERT(num_threads>0);
01249     int32_t* vec=SG_MALLOC(int32_t, num_threads*num_feat);
01250 
01251     if (num_threads < 2)
01252     {
01253 #ifdef WIN32
01254        for (int32_t j=0; j<num_feat; j++)
01255 #else
01256        CSignal::clear_cancel();
01257        for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01258 #endif
01259             {
01260                 init_optimization(num_suppvec, IDX, alphas, j);
01261                 S_THREAD_PARAM<DNATrie> params;
01262                 params.vec=vec;
01263                 params.result=result;
01264                 params.weights=weights;
01265                 params.kernel=this;
01266                 params.tries=&tries;
01267                 params.factor=factor;
01268                 params.j=j;
01269                 params.start=0;
01270                 params.end=num_vec;
01271                 params.length=length;
01272                 params.max_shift=max_shift;
01273                 params.shift=shift;
01274                 params.vec_idx=vec_idx;
01275                 compute_batch_helper((void*) &params);
01276 
01277                 SG_PROGRESS(j,0,num_feat);
01278             }
01279     }
01280 #ifdef HAVE_PTHREAD
01281     else
01282     {
01283 
01284         CSignal::clear_cancel();
01285         for (int32_t j=0; j<num_feat && !CSignal::cancel_computations(); j++)
01286         {
01287             init_optimization(num_suppvec, IDX, alphas, j);
01288             pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
01289             S_THREAD_PARAM<DNATrie>* params = SG_MALLOC(S_THREAD_PARAM<DNATrie>, num_threads);
01290             int32_t step= num_vec/num_threads;
01291             int32_t t;
01292 
01293             for (t=0; t<num_threads-1; t++)
01294             {
01295                 params[t].vec=&vec[num_feat*t];
01296                 params[t].result=result;
01297                 params[t].weights=weights;
01298                 params[t].kernel=this;
01299                 params[t].tries=&tries;
01300                 params[t].factor=factor;
01301                 params[t].j=j;
01302                 params[t].start = t*step;
01303                 params[t].end = (t+1)*step;
01304                 params[t].length=length;
01305                 params[t].max_shift=max_shift;
01306                 params[t].shift=shift;
01307                 params[t].vec_idx=vec_idx;
01308                 pthread_create(&threads[t], NULL, CWeightedDegreePositionStringKernel::compute_batch_helper, (void*)&params[t]);
01309             }
01310 
01311             params[t].vec=&vec[num_feat*t];
01312             params[t].result=result;
01313             params[t].weights=weights;
01314             params[t].kernel=this;
01315             params[t].tries=&tries;
01316             params[t].factor=factor;
01317             params[t].j=j;
01318             params[t].start=t*step;
01319             params[t].end=num_vec;
01320             params[t].length=length;
01321             params[t].max_shift=max_shift;
01322             params[t].shift=shift;
01323             params[t].vec_idx=vec_idx;
01324             compute_batch_helper((void*) &params[t]);
01325 
01326             for (t=0; t<num_threads-1; t++)
01327                 pthread_join(threads[t], NULL);
01328             SG_PROGRESS(j,0,num_feat);
01329 
01330             SG_FREE(params);
01331             SG_FREE(threads);
01332         }
01333     }
01334 #endif
01335 
01336     SG_FREE(vec);
01337 
01338     //really also free memory as this can be huge on testing especially when
01339     //using the combined kernel
01340     create_empty_tries();
01341 }
01342 
01343 float64_t* CWeightedDegreePositionStringKernel::compute_scoring(
01344     int32_t max_degree, int32_t& num_feat, int32_t& num_sym, float64_t* result,
01345     int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01346 {
01347     ASSERT(position_weights_lhs==NULL);
01348     ASSERT(position_weights_rhs==NULL);
01349 
01350     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01351     ASSERT(num_feat>0);
01352     ASSERT(alphabet);
01353     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01354 
01355     num_sym=4; //for now works only w/ DNA
01356 
01357     ASSERT(max_degree>0);
01358 
01359     // === variables
01360     int32_t* nofsKmers=SG_MALLOC(int32_t, max_degree);
01361     float64_t** C=SG_MALLOC(float64_t*, max_degree);
01362     float64_t** L=SG_MALLOC(float64_t*, max_degree);
01363     float64_t** R=SG_MALLOC(float64_t*, max_degree);
01364 
01365     int32_t i;
01366     int32_t k;
01367 
01368     // --- return table
01369     int32_t bigtabSize=0;
01370     for (k=0; k<max_degree; ++k )
01371     {
01372         nofsKmers[k]=(int32_t) CMath::pow(num_sym, k+1);
01373         const int32_t tabSize=nofsKmers[k]*num_feat;
01374         bigtabSize+=tabSize;
01375     }
01376     result=SG_MALLOC(float64_t, bigtabSize);
01377 
01378     // --- auxilliary tables
01379     int32_t tabOffs=0;
01380     for( k = 0; k < max_degree; ++k )
01381     {
01382         const int32_t tabSize = nofsKmers[k] * num_feat;
01383         C[k] = &result[tabOffs];
01384         L[k] = SG_MALLOC(float64_t,  tabSize );
01385         R[k] = SG_MALLOC(float64_t,  tabSize );
01386         tabOffs+=tabSize;
01387         for(i = 0; i < tabSize; i++ )
01388         {
01389             C[k][i] = 0.0;
01390             L[k][i] = 0.0;
01391             R[k][i] = 0.0;
01392         }
01393     }
01394 
01395     // --- tree parsing info
01396     float64_t* margFactors=SG_MALLOC(float64_t, degree);
01397 
01398     int32_t* x = SG_MALLOC(int32_t,  degree+1 );
01399     int32_t* substrs = SG_MALLOC(int32_t,  degree+1 );
01400     // - fill arrays
01401     margFactors[0] = 1.0;
01402     substrs[0] = 0;
01403     for( k=1; k < degree; ++k ) {
01404         margFactors[k] = 0.25 * margFactors[k-1];
01405         substrs[k] = -1;
01406     }
01407     substrs[degree] = -1;
01408     // - fill struct
01409     struct TreeParseInfo info;
01410     info.num_sym = num_sym;
01411     info.num_feat = num_feat;
01412     info.p = -1;
01413     info.k = -1;
01414     info.nofsKmers = nofsKmers;
01415     info.margFactors = margFactors;
01416     info.x = x;
01417     info.substrs = substrs;
01418     info.y0 = 0;
01419     info.C_k = NULL;
01420     info.L_k = NULL;
01421     info.R_k = NULL;
01422 
01423     // === main loop
01424     i = 0; // total progress
01425     for( k = 0; k < max_degree; ++k )
01426     {
01427         const int32_t nofKmers = nofsKmers[ k ];
01428         info.C_k = C[k];
01429         info.L_k = L[k];
01430         info.R_k = R[k];
01431 
01432         // --- run over all trees
01433         for(int32_t p = 0; p < num_feat; ++p )
01434         {
01435             init_optimization( num_suppvec, IDX, alphas, p );
01436             int32_t tree = p ;
01437             for(int32_t j = 0; j < degree+1; j++ ) {
01438                 x[j] = -1;
01439             }
01440             tries.traverse( tree, p, info, 0, x, k );
01441             SG_PROGRESS(i++,0,num_feat*max_degree);
01442         }
01443 
01444         // --- add partial overlap scores
01445         if( k > 0 ) {
01446             const int32_t j = k - 1;
01447             const int32_t nofJmers = (int32_t) CMath::pow( num_sym, j+1 );
01448             for(int32_t p = 0; p < num_feat; ++p ) {
01449                 const int32_t offsetJ = nofJmers * p;
01450                 const int32_t offsetJ1 = nofJmers * (p+1);
01451                 const int32_t offsetK = nofKmers * p;
01452                 int32_t y;
01453                 int32_t sym;
01454                 for( y = 0; y < nofJmers; ++y ) {
01455                     for( sym = 0; sym < num_sym; ++sym ) {
01456                         const int32_t y_sym = num_sym*y + sym;
01457                         const int32_t sym_y = nofJmers*sym + y;
01458                         ASSERT(0<=y_sym && y_sym<nofKmers);
01459                         ASSERT(0<=sym_y && sym_y<nofKmers);
01460                         C[k][ y_sym + offsetK ] += L[j][ y + offsetJ ];
01461                         if( p < num_feat-1 ) {
01462                             C[k][ sym_y + offsetK ] += R[j][ y + offsetJ1 ];
01463                         }
01464                     }
01465                 }
01466             }
01467         }
01468         //   if( k > 1 )
01469         //     j = k-1
01470         //     for all positions p
01471         //       for all j-mers y
01472         //          for n in {A,C,G,T}
01473         //            C_k[ p, [y,n] ] += L_j[ p, y ]
01474         //            C_k[ p, [n,y] ] += R_j[ p+1, y ]
01475         //          end;
01476         //       end;
01477         //     end;
01478         //   end;
01479     }
01480 
01481     // === return a vector
01482     num_feat=1;
01483     num_sym = bigtabSize;
01484     // --- clean up
01485     SG_FREE(nofsKmers);
01486     SG_FREE(margFactors);
01487     SG_FREE(substrs);
01488     SG_FREE(x);
01489     SG_FREE(C);
01490     for( k = 0; k < max_degree; ++k ) {
01491         SG_FREE(L[k]);
01492         SG_FREE(R[k]);
01493     }
01494     SG_FREE(L);
01495     SG_FREE(R);
01496     return result;
01497 }
01498 
01499 char* CWeightedDegreePositionStringKernel::compute_consensus(
01500     int32_t &num_feat, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01501 {
01502     ASSERT(position_weights_lhs==NULL);
01503     ASSERT(position_weights_rhs==NULL);
01504     //only works for order <= 32
01505     ASSERT(degree<=32);
01506     ASSERT(!tries.get_use_compact_terminal_nodes());
01507     num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01508     ASSERT(num_feat>0);
01509     ASSERT(alphabet);
01510     ASSERT(alphabet->get_alphabet()==DNA || alphabet->get_alphabet()==RNA);
01511 
01512     //consensus
01513     char* result=SG_MALLOC(char, num_feat);
01514 
01515     //backtracking and scoring table
01516     int32_t num_tables=CMath::max(1,num_feat-degree+1);
01517     DynArray<ConsensusEntry>** table=SG_MALLOC(DynArray<ConsensusEntry>*, num_tables);
01518 
01519     for (int32_t i=0; i<num_tables; i++)
01520         table[i]=new DynArray<ConsensusEntry>(num_suppvec/10);
01521 
01522     //compute consensus via dynamic programming
01523     for (int32_t i=0; i<num_tables; i++)
01524     {
01525         bool cumulative=false;
01526 
01527         if (i<num_tables-1)
01528             init_optimization(num_suppvec, IDX, alphas, i);
01529         else
01530         {
01531             init_optimization(num_suppvec, IDX, alphas, i, num_feat-1);
01532             cumulative=true;
01533         }
01534 
01535         if (i==0)
01536             tries.fill_backtracking_table(i, NULL, table[i], cumulative, weights);
01537         else
01538             tries.fill_backtracking_table(i, table[i-1], table[i], cumulative, weights);
01539 
01540         SG_PROGRESS(i,0,num_feat);
01541     }
01542 
01543 
01544     //int32_t n=table[0]->get_num_elements();
01545 
01546     //for (int32_t i=0; i<n; i++)
01547     //{
01548     //  ConsensusEntry e= table[0]->get_element(i);
01549     //  SG_PRint32_t("first: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01550     //}
01551 
01552     //n=table[num_tables-1]->get_num_elements();
01553     //for (int32_t i=0; i<n; i++)
01554     //{
01555     //  ConsensusEntry e= table[num_tables-1]->get_element(i);
01556     //  SG_PRint32_t("last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01557     //}
01558     //n=table[num_tables-2]->get_num_elements();
01559     //for (int32_t i=0; i<n; i++)
01560     //{
01561     //  ConsensusEntry e= table[num_tables-2]->get_element(i);
01562     //  SG_PRINT("second last: str:0%0llx sc:%f bt:%d\n",e.string,e.score,e.bt);
01563     //}
01564 
01565     const char* acgt="ACGT";
01566 
01567     //backtracking start
01568     int32_t max_idx=-1;
01569     float32_t max_score=0;
01570     int32_t num_elements=table[num_tables-1]->get_num_elements();
01571 
01572     for (int32_t i=0; i<num_elements; i++)
01573     {
01574         float64_t sc=table[num_tables-1]->get_element(i).score;
01575         if (sc>max_score || max_idx==-1)
01576         {
01577             max_idx=i;
01578             max_score=sc;
01579         }
01580     }
01581     uint64_t endstr=table[num_tables-1]->get_element(max_idx).string;
01582 
01583     SG_INFO("max_idx:%d num_el:%d num_feat:%d num_tables:%d max_score:%f\n", max_idx, num_elements, num_feat, num_tables, max_score);
01584 
01585     for (int32_t i=0; i<degree; i++)
01586         result[num_feat-1-i]=acgt[(endstr >> (2*i)) & 3];
01587 
01588     if (num_tables>1)
01589     {
01590         for (int32_t i=num_tables-1; i>=0; i--)
01591         {
01592             //SG_PRINT("max_idx: %d, i:%d\n", max_idx, i);
01593             result[i]=acgt[table[i]->get_element(max_idx).string >> (2*(degree-1)) & 3];
01594             max_idx=table[i]->get_element(max_idx).bt;
01595         }
01596     }
01597 
01598     //for (int32_t t=0; t<num_tables; t++)
01599     //{
01600     //  n=table[t]->get_num_elements();
01601     //  for (int32_t i=0; i<n; i++)
01602     //  {
01603     //      ConsensusEntry e= table[t]->get_element(i);
01604     //      SG_PRINT("table[%d,%d]: str:0%0llx sc:%+f bt:%d\n",t,i, e.string,e.score,e.bt);
01605     //  }
01606     //}
01607 
01608     for (int32_t i=0; i<num_tables; i++)
01609         delete table[i];
01610 
01611     SG_FREE(table);
01612     return result;
01613 }
01614 
01615 
01616 float64_t* CWeightedDegreePositionStringKernel::extract_w(
01617     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01618     float64_t* w_result, int32_t num_suppvec, int32_t* IDX, float64_t* alphas)
01619 {
01620   delete_optimization();
01621   use_poim_tries=true;
01622   poim_tries.delete_trees(false);
01623 
01624   // === check
01625   ASSERT(position_weights_lhs==NULL);
01626   ASSERT(position_weights_rhs==NULL);
01627   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01628   ASSERT(num_feat>0);
01629   ASSERT(alphabet->get_alphabet()==DNA);
01630   ASSERT(max_degree>0);
01631 
01632   // === general variables
01633   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01634   const int32_t seqLen = num_feat;
01635   float64_t** subs;
01636   int32_t i;
01637   int32_t k;
01638   //int32_t y;
01639 
01640   // === init tables "subs" for substring scores / POIMs
01641   // --- compute table sizes
01642   int32_t* offsets;
01643   int32_t offset;
01644   offsets = SG_MALLOC(int32_t,  max_degree );
01645   offset = 0;
01646   for( k = 0; k < max_degree; ++k ) {
01647     offsets[k] = offset;
01648     const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 );
01649     const int32_t tabSize = nofsKmers * seqLen;
01650     offset += tabSize;
01651   }
01652   // --- allocate memory
01653   const int32_t bigTabSize = offset;
01654   w_result=SG_MALLOC(float64_t, bigTabSize);
01655   for (i=0; i<bigTabSize; ++i)
01656     w_result[i]=0;
01657 
01658   // --- set pointers for tables
01659   subs = SG_MALLOC(float64_t*,  max_degree );
01660   ASSERT( subs != NULL );
01661   for( k = 0; k < max_degree; ++k ) {
01662     subs[k] = &w_result[ offsets[k] ];
01663   }
01664   SG_FREE(offsets);
01665 
01666   // === init trees; extract "w"
01667   init_optimization( num_suppvec, IDX, alphas, -1);
01668   poim_tries.POIMs_extract_W( subs, max_degree );
01669 
01670   // === clean; return "subs" as vector
01671   SG_FREE(subs);
01672   num_feat = 1;
01673   num_sym = bigTabSize;
01674   use_poim_tries=false;
01675   poim_tries.delete_trees(false);
01676   return w_result;
01677 }
01678 
01679 float64_t* CWeightedDegreePositionStringKernel::compute_POIM(
01680     int32_t max_degree, int32_t& num_feat, int32_t& num_sym,
01681     float64_t* poim_result, int32_t num_suppvec, int32_t* IDX,
01682     float64_t* alphas, float64_t* distrib )
01683 {
01684   delete_optimization();
01685   use_poim_tries=true;
01686   poim_tries.delete_trees(false);
01687 
01688   // === check
01689   ASSERT(position_weights_lhs==NULL);
01690   ASSERT(position_weights_rhs==NULL);
01691   num_feat=((CStringFeatures<char>*) rhs)->get_max_vector_length();
01692   ASSERT(num_feat>0);
01693   ASSERT(alphabet->get_alphabet()==DNA);
01694   ASSERT(max_degree!=0);
01695   ASSERT(distrib);
01696 
01697   // === general variables
01698   static const int32_t NUM_SYMS = poim_tries.NUM_SYMS;
01699   const int32_t seqLen = num_feat;
01700   float64_t** subs;
01701   int32_t i;
01702   int32_t k;
01703 
01704   // === DEBUGGING mode
01705   //
01706   // Activated if "max_degree" < 0.
01707   // Allows to output selected partial score.
01708   //
01709   // |max_degree| mod 4
01710   //   0: substring
01711   //   1: superstring
01712   //   2: left overlap
01713   //   3: right overlap
01714   //
01715   const int32_t debug = ( max_degree < 0 ) ? ( abs(max_degree) % 4 + 1 ) : 0;
01716   if( debug ) {
01717     max_degree = abs(max_degree) / 4;
01718     switch( debug ) {
01719     case 1: {
01720       printf( "POIM DEBUGGING: substring only (max order=%d)\n", max_degree );
01721       break;
01722     }
01723     case 2: {
01724       printf( "POIM DEBUGGING: superstring only (max order=%d)\n", max_degree );
01725       break;
01726     }
01727     case 3: {
01728       printf( "POIM DEBUGGING: left overlap only (max order=%d)\n", max_degree );
01729       break;
01730     }
01731     case 4: {
01732       printf( "POIM DEBUGGING: right overlap only (max order=%d)\n", max_degree );
01733       break;
01734     }
01735     default: {
01736       printf( "POIM DEBUGGING: something is wrong (max order=%d)\n", max_degree );
01737       ASSERT(0);
01738       break;
01739     }
01740     }
01741   }
01742 
01743   // --- compute table sizes
01744   int32_t* offsets;
01745   int32_t offset;
01746   offsets = SG_MALLOC(int32_t,  max_degree );
01747   offset = 0;
01748   for( k = 0; k < max_degree; ++k ) {
01749     offsets[k] = offset;
01750     const int32_t nofsKmers = (int32_t) CMath::pow( NUM_SYMS, k+1 );
01751     const int32_t tabSize = nofsKmers * seqLen;
01752     offset += tabSize;
01753   }
01754   // --- allocate memory
01755   const int32_t bigTabSize=offset;
01756   poim_result=SG_MALLOC(float64_t, bigTabSize);
01757   for (i=0; i<bigTabSize; ++i )
01758     poim_result[i]=0;
01759 
01760   // --- set pointers for tables
01761   subs=SG_MALLOC(float64_t*, max_degree);
01762   for (k=0; k<max_degree; ++k)
01763     subs[k]=&poim_result[offsets[k]];
01764 
01765   SG_FREE(offsets);
01766 
01767   // === init trees; precalc S, L and R
01768   init_optimization( num_suppvec, IDX, alphas, -1);
01769   poim_tries.POIMs_precalc_SLR( distrib );
01770 
01771   // === compute substring scores
01772   if( debug==0 || debug==1 ) {
01773     poim_tries.POIMs_extract_W( subs, max_degree );
01774     for( k = 1; k < max_degree; ++k ) {
01775       const int32_t nofKmers2 = ( k > 1 ) ? (int32_t) CMath::pow(NUM_SYMS,k-1) : 0;
01776       const int32_t nofKmers1 = (int32_t) CMath::pow( NUM_SYMS, k );
01777       const int32_t nofKmers0 = nofKmers1 * NUM_SYMS;
01778       for( i = 0; i < seqLen; ++i ) {
01779     float64_t* const subs_k2i1 = ( k>1 && i<seqLen-1 ) ? &subs[k-2][(i+1)*nofKmers2] : NULL;
01780     float64_t* const subs_k1i1 = ( i < seqLen-1 ) ? &subs[k-1][(i+1)*nofKmers1] : NULL;
01781     float64_t* const subs_k1i0 = & subs[ k-1 ][ i*nofKmers1 ];
01782     float64_t* const subs_k0i  = & subs[ k-0 ][ i*nofKmers0 ];
01783     int32_t y0;
01784     for( y0 = 0; y0 < nofKmers0; ++y0 ) {
01785       const int32_t y1l = y0 / NUM_SYMS;
01786       const int32_t y1r = y0 % nofKmers1;
01787       const int32_t y2 = y1r / NUM_SYMS;
01788       subs_k0i[ y0 ] += subs_k1i0[ y1l ];
01789       if( i < seqLen-1 ) {
01790         subs_k0i[ y0 ] += subs_k1i1[ y1r ];
01791         if( k > 1 ) {
01792           subs_k0i[ y0 ] -= subs_k2i1[ y2 ];
01793         }
01794       }
01795     }
01796       }
01797     }
01798   }
01799 
01800   // === compute POIMs
01801   poim_tries.POIMs_add_SLR( subs, max_degree, debug );
01802 
01803   // === clean; return "subs" as vector
01804   SG_FREE(subs);
01805   num_feat = 1;
01806   num_sym = bigTabSize;
01807 
01808   use_poim_tries=false;
01809   poim_tries.delete_trees(false);
01810   
01811   return poim_result;
01812 }
01813 
01814 
01815 void CWeightedDegreePositionStringKernel::prepare_POIM2(
01816     float64_t* distrib, int32_t num_sym, int32_t num_feat)
01817 {
01818     SG_FREE(m_poim_distrib);
01819     m_poim_distrib=SG_MALLOC(float64_t, num_sym*num_feat);
01820 
01821     memcpy(m_poim_distrib, distrib, num_sym*num_feat*sizeof(float64_t));
01822     m_poim_num_sym=num_sym;
01823     m_poim_num_feat=num_feat;
01824 }
01825 
01826 void CWeightedDegreePositionStringKernel::compute_POIM2(
01827     int32_t max_degree, CSVM* svm)
01828 {
01829     ASSERT(svm);
01830     int32_t num_suppvec=svm->get_num_support_vectors();
01831     int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
01832     float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
01833 
01834     for (int32_t i=0; i<num_suppvec; i++)
01835     {
01836         sv_idx[i]=svm->get_support_vector(i);
01837         sv_weight[i]=svm->get_alpha(i);
01838     }
01839     
01840     if ((max_degree < 1) || (max_degree > 12))
01841     {
01842         //SG_WARNING( "max_degree out of range 1..12 (%d).\n", max_degree);
01843         SG_WARNING( "max_degree out of range 1..12 (%d). setting to 1.\n", max_degree);
01844         max_degree=1;
01845     }
01846     
01847     int32_t num_feat = m_poim_num_feat;
01848     int32_t num_sym = m_poim_num_sym;
01849     SG_FREE(m_poim);
01850 
01851     m_poim = compute_POIM(max_degree, num_feat, num_sym, NULL,  num_suppvec, sv_idx, 
01852                           sv_weight, m_poim_distrib);
01853 
01854     ASSERT(num_feat==1);
01855     m_poim_result_len=num_sym;
01856     
01857     SG_FREE(sv_weight);
01858     SG_FREE(sv_idx);
01859 }
01860 
01861 void CWeightedDegreePositionStringKernel::get_POIM2(
01862     float64_t** poim, int32_t* result_len)
01863 {
01864     *poim=SG_MALLOC(float64_t, m_poim_result_len);
01865     ASSERT(*poim);
01866     memcpy(*poim, m_poim, m_poim_result_len*sizeof(float64_t)) ;
01867     *result_len=m_poim_result_len ;
01868 }
01869 
01870 void CWeightedDegreePositionStringKernel::cleanup_POIM2()
01871 {
01872     SG_FREE(m_poim) ;
01873     m_poim=NULL ;
01874     SG_FREE(m_poim_distrib) ;
01875     m_poim_distrib=NULL ;
01876     m_poim_num_sym=0 ;
01877     m_poim_num_sym=0 ;
01878     m_poim_result_len=0 ;
01879 }
01880 
01881 void CWeightedDegreePositionStringKernel::load_serializable_post() throw (ShogunException)
01882 {
01883     CKernel::load_serializable_post();
01884 
01885     tries=CTrie<DNATrie>(degree);
01886     poim_tries=CTrie<POIMTrie>(degree);
01887 
01888     if (weights)
01889         init_block_weights();
01890 }
01891 
01892 void CWeightedDegreePositionStringKernel::init()
01893 {
01894     weights=NULL;
01895     position_weights=NULL;
01896     position_weights_len=0;
01897 
01898     position_weights_lhs=NULL;
01899     position_weights_lhs_len=0;
01900     position_weights_rhs=NULL;
01901     position_weights_rhs_len=0;
01902 
01903     weights_buffer=NULL;
01904     mkl_stepsize=1;
01905     degree=1;
01906     length=0;
01907 
01908     max_shift=0;
01909     max_mismatch=0;
01910     seq_length=0;
01911     shift=NULL;
01912     shift_len=0;
01913 
01914     block_weights=NULL;
01915     block_computation=true;
01916     type=E_EXTERNAL;
01917     which_degree=-1;
01918     tries=CTrie<DNATrie>(1);
01919     poim_tries=CTrie<POIMTrie>(1);
01920 
01921     tree_initialized=false;
01922     use_poim_tries=false;
01923     m_poim_distrib=NULL;
01924 
01925     m_poim=NULL;
01926     m_poim_num_sym=0;
01927     m_poim_num_feat=0;
01928     m_poim_result_len=0;
01929 
01930     alphabet=NULL;
01931 
01932     properties |= KP_LINADD | KP_KERNCOMBINATION | KP_BATCHEVALUATION;
01933 
01934     set_normalizer(new CSqrtDiagKernelNormalizer());
01935 
01936     m_parameters->add_matrix(&weights, &weights_degree, &weights_length,
01937             "weights", "WD Kernel weights.");
01938     m_parameters->add_vector(&position_weights, &position_weights_len,
01939             "position_weights",
01940             "Weights per position.");
01941     m_parameters->add_vector(&position_weights_lhs, &position_weights_lhs_len,
01942             "position_weights_lhs",
01943             "Weights per position left hand side.");
01944     m_parameters->add_vector(&position_weights_rhs, &position_weights_rhs_len,
01945             "position_weights_rhs",
01946             "Weights per position right hand side.");
01947     m_parameters->add_vector(&shift, &shift_len,
01948             "shift",
01949             "Shift Vector.");
01950     m_parameters->add(&mkl_stepsize, "mkl_stepsize", "MKL step size.");
01951     m_parameters->add(&degree, "degree", "Order of WD kernel.");
01952     m_parameters->add(&max_mismatch, "max_mismatch",
01953             "Number of allowed mismatches.");
01954     m_parameters->add(&block_computation, "block_computation",
01955             "If block computation shall be used.");
01956     m_parameters->add((machine_int_t*) &type, "type",
01957             "WeightedDegree kernel type.");
01958     m_parameters->add(&which_degree, "which_degree",
01959             "Unqueal -1 if just a single degree is selected.");
01960     m_parameters->add((CSGObject**) &alphabet, "alphabet",
01961             "Alphabet of Features.");
01962 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation