Trie.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg, Gunnar Raetsch
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/lib/common.h>
00012 #include <shogun/io/SGIO.h>
00013 #include <shogun/lib/Trie.h>
00014 #include <shogun/mathematics/Math.h>
00015 
00016 namespace shogun
00017 {
00018 template <>
00019 void CTrie<POIMTrie>::POIMs_extract_W_helper(
00020     const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00021     const int32_t y0, float64_t* const* const W, const int32_t K )
00022 {
00023     ASSERT(nodeIdx!=NO_CHILD);
00024     ASSERT(depth<K);
00025     float64_t* const W_kiy = & W[ depth ][ offset + y0 ];
00026     POIMTrie* const node = &TreeMem[ nodeIdx ];
00027     int32_t sym;
00028 
00029     if( depth < degree-1 )
00030     {
00031         const int32_t offset1 = offset * NUM_SYMS;
00032         for( sym = 0; sym < NUM_SYMS; ++sym )
00033         {
00034             ASSERT(W_kiy[sym]==0);
00035             const int32_t childIdx = node->children[ sym ];
00036             if( childIdx != NO_CHILD )
00037             {
00038                 W_kiy[ sym ] = TreeMem[ childIdx ].weight;
00039 
00040                 if (depth < K-1)
00041                 {
00042                     const int32_t y1 = ( y0 + sym ) * NUM_SYMS;
00043                     POIMs_extract_W_helper( childIdx, depth+1, offset1, y1, W, K );
00044                 }
00045             }
00046         }
00047     }
00048     else
00049     {
00050         ASSERT(depth==degree-1);
00051         for( sym = 0; sym < NUM_SYMS; ++sym )
00052         {
00053             ASSERT(W_kiy[sym]==0);
00054             W_kiy[ sym ] = node->child_weights[ sym ];
00055         }
00056     }
00057 }
00058 
00059 template <>
00060 void CTrie<POIMTrie>::POIMs_extract_W(
00061     float64_t* const* const W, const int32_t K)
00062 {
00063   ASSERT(degree>=1);
00064   ASSERT(K>=1);
00065   const int32_t N = length;
00066   int32_t i;
00067   for( i = 0; i < N; ++i ) {
00068     //SG_PRINT( "W_helper( %d )\n", i );
00069     POIMs_extract_W_helper( trees[i], 0, i*NUM_SYMS, 0*NUM_SYMS, W, K );
00070   }
00071 }
00072 
00073 template <>
00074 void CTrie<POIMTrie>::POIMs_calc_SLR_helper1(
00075     const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
00076     int32_t left_tries_idx[4], const int32_t depth, int32_t const lastSym,
00077     float64_t* S, float64_t* L, float64_t* R )
00078 {
00079   ASSERT(depth==degree-1);
00080   ASSERT(nodeIdx!=NO_CHILD);
00081 
00082   const float64_t* const distribLeft  = & distrib[ (i-1)     * NUM_SYMS ];
00083   POIMTrie* const node = &TreeMem[ nodeIdx ];
00084   int32_t symRight;
00085   int32_t symLeft;
00086 
00087   // --- init
00088   node->S = 0;
00089   node->L = 0;
00090   node->R = 0;
00091 
00092   if (i+depth<length)
00093   {
00094       const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
00095 
00096       // --- go thru direct children
00097       for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
00098           const float64_t w1 = node->child_weights[ symRight ];
00099           const float64_t pRight = distribRight[ symRight ];
00100           const float64_t incr1 = pRight * w1;
00101           node->S += incr1;
00102           node->R += incr1;
00103       }
00104   }
00105 
00106   // --- collect precalced values from left neighbor tree
00107   for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00108   {
00109       if (left_tries_idx[symLeft] != NO_CHILD)
00110       {
00111           POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
00112 
00113           ASSERT(nodeLeft);
00114           const float64_t w2 = nodeLeft->child_weights[ lastSym ];
00115           const float64_t pLeft = distribLeft[ symLeft ];
00116           const float64_t incr2 = pLeft * w2;
00117           node->S += incr2;
00118           node->L += incr2;
00119       }
00120   }
00121 
00122   // --- add w and return results
00123   const float64_t w0 = node->weight;
00124   node->S += w0;
00125   node->L += w0;
00126   node->R += w0;
00127   *S = node->S;
00128   *L = node->L;
00129   *R = node->R;
00130 }
00131 
00132 
00133 template <>
00134 void CTrie<POIMTrie>::POIMs_calc_SLR_helper2(
00135     const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
00136     int32_t left_tries_idx[4], const int32_t depth, float64_t* S, float64_t* L,
00137     float64_t* R )
00138 {
00139   ASSERT(0<=depth && depth<=degree-2);
00140   ASSERT(nodeIdx!=NO_CHILD);
00141 
00142   const float64_t* const distribLeft  = & distrib[ (i-1)     * NUM_SYMS ];
00143   POIMTrie* const node = &TreeMem[ nodeIdx ];
00144   float64_t dummy;
00145   float64_t auxS;
00146   float64_t auxR;
00147   int32_t symRight;
00148   int32_t symLeft;
00149 
00150   // --- init
00151   node->S = 0;
00152   node->L = 0;
00153   node->R = 0;
00154 
00155   // --- recurse thru direct children
00156   for( symRight = 0; symRight < NUM_SYMS; ++symRight )
00157   {
00158       const int32_t childIdx = node->children[ symRight ];
00159       if( childIdx != NO_CHILD )
00160       {
00161           if( depth < degree-2 ) 
00162           {
00163               int32_t new_left_tries_idx[4];
00164 
00165               for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00166               {
00167                   new_left_tries_idx[symLeft]=NO_CHILD;
00168 
00169                   if (left_tries_idx[symLeft] != NO_CHILD)
00170                   {
00171                       POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
00172                       ASSERT(nodeLeft);
00173                       new_left_tries_idx[ symLeft ]=nodeLeft->children[ symRight ];
00174                   }
00175               }
00176               POIMs_calc_SLR_helper2( distrib, i, childIdx, new_left_tries_idx, depth+1, &auxS, &dummy, &auxR );
00177           }
00178           else 
00179               POIMs_calc_SLR_helper1( distrib, i, childIdx, left_tries_idx, depth+1, symRight, &auxS, &dummy, &auxR );
00180 
00181           if (i+depth<length)
00182           {
00183               const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
00184               const float64_t pRight = distribRight[ symRight ];
00185               node->S += pRight * auxS;
00186               node->R += pRight * auxR;
00187           }
00188       }
00189   }
00190 
00191   // --- collect precalced values from left neighbor tree
00192   for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00193   {
00194       if (left_tries_idx[symLeft] != NO_CHILD)
00195       {
00196           const POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
00197           ASSERT(nodeLeft);
00198           const float64_t pLeft = distribLeft[ symLeft ];
00199 
00200           node->S += pLeft * nodeLeft->S;
00201           node->L += pLeft * nodeLeft->L;
00202 
00203           if (i+depth<length)
00204           {
00205               const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
00206               // - second order correction for S
00207               auxS = 0;
00208               if( depth < degree-2 )
00209               {
00210                   for( symRight = 0; symRight < NUM_SYMS; ++symRight )
00211                   {
00212                       const int32_t childIdxLeft = nodeLeft->children[ symRight ];
00213                       if( childIdxLeft != NO_CHILD )
00214                       {
00215                           const POIMTrie* const childLeft = &TreeMem[ childIdxLeft ];
00216                           auxS += distribRight[symRight] * childLeft->S;
00217                       }
00218                   }
00219               }
00220               else
00221               {
00222                   for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
00223                       auxS += distribRight[symRight] * nodeLeft->child_weights[ symRight ];
00224                   }
00225               }
00226               node->S -= pLeft* auxS;
00227           }
00228       }
00229   }
00230 
00231   // --- add w and return results
00232   const float64_t w0 = node->weight;
00233   //SG_PRINT( "  d=%d, node=%d, dS=%.3f, w=%.3f\n", depth, nodeIdx, node->S, w0 );
00234   node->S += w0;
00235   node->L += w0;
00236   node->R += w0;
00237   *S = node->S;
00238   *L = node->L;
00239   *R = node->R;
00240 }
00241 
00242 
00243 
00244 template <>
00245 void CTrie<POIMTrie>::POIMs_precalc_SLR( const float64_t* const distrib )
00246 {
00247     if( degree == 1 ) {
00248         return;
00249     }
00250 
00251     ASSERT(degree>=2);
00252     const int32_t N = length;
00253     float64_t dummy;
00254     int32_t symLeft;
00255     int32_t leftSubtrees[4];
00256     for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00257         leftSubtrees[ symLeft ] = NO_CHILD;
00258 
00259     for(int32_t i = 0; i < N; ++i )
00260     {
00261         POIMs_calc_SLR_helper2( distrib, i, trees[i], leftSubtrees, 0, &dummy, &dummy, &dummy );
00262 
00263         const POIMTrie* const node = &TreeMem[ trees[i] ];
00264         ASSERT(trees[i]!=NO_CHILD);
00265 
00266         for(symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
00267             leftSubtrees[ symLeft ] = node->children[ symLeft ];
00268     }
00269 }
00270 
00271 template <>
00272 void CTrie<POIMTrie>::POIMs_get_SLR(
00273     const int32_t parentIdx, const int32_t sym, const int32_t depth,
00274     float64_t* S, float64_t* L, float64_t* R )
00275 {
00276   ASSERT(parentIdx!=NO_CHILD);
00277   const POIMTrie* const parent = &TreeMem[ parentIdx ];
00278   if( depth < degree ) {
00279     const int32_t nodeIdx = parent->children[ sym ];
00280     const POIMTrie* const node = &TreeMem[ nodeIdx ];
00281     *S = node->S;
00282     *L = node->L;
00283     *R = node->R;
00284   } else {
00285     ASSERT(depth==degree);
00286     const float64_t w = parent->child_weights[ sym ];
00287     *S = w;
00288     *L = w;
00289     *R = w;
00290   }
00291 }
00292 
00293 template <>
00294 void CTrie<POIMTrie>::POIMs_add_SLR_helper2(
00295     float64_t* const* const poims, const int32_t K, const int32_t k,
00296     const int32_t i, const int32_t y, const float64_t valW,
00297     const float64_t valS, const float64_t valL, const float64_t valR,
00298     const int32_t debug)
00299 {
00300     //SG_PRINT( "i=%d, d=%d, y=%d:  w=%.3f \n", i, k, y, valW );
00301     const int32_t nk = nofsKmers[ k ];
00302     ASSERT(1<=k && k<=K);
00303     ASSERT(0<=y && y<nk);
00304     int32_t z;
00305     int32_t j;
00306 
00307     // --- add superstring score; subtract "w", as it was counted twice
00308     if( debug==0 || debug==2 )
00309     {
00310         poims[ k-1 ][ i*nk + y ] += valS - valW;
00311     }
00312 
00313     // --- left partial overlaps
00314     if( debug==0 || debug==3 )
00315     {
00316         int32_t r;
00317         for( r = 1; k+r <= K; ++r )
00318         {
00319             const int32_t nr = nofsKmers[ r ];
00320             const int32_t nz = nofsKmers[ k+r ];
00321             float64_t* const poim = & poims[ k+r-1 ][ i*nz ];
00322             z = y * nr;
00323             for( j = 0; j < nr; ++j )
00324             {
00325                 if( !( 0 <= z && z < nz ) ) {
00326                     SG_PRINT( "k=%d, nk=%d,  r=%d, nr=%d,  nz=%d \n", k, nk, r, nr, nz );
00327                     SG_PRINT( "  j=%d, y=%d, z=%d \n", j, y, z );
00328                 }
00329                 ASSERT(0<=z && z<nz);
00330                 poim[ z ] += valL - valW;
00331                 ++z;
00332             }
00333         }
00334     }
00335     // --- right partial overlaps
00336     if( debug==0 || debug==4 )
00337     {
00338         int32_t l;
00339         for( l = 1; k+l <= K && l <= i; ++l )
00340         {
00341             const int32_t nl = nofsKmers[ l ];
00342             const int32_t nz = nofsKmers[ k+l ];
00343             float64_t* const poim = & poims[ k+l-1 ][ (i-l)*nz ];
00344             z = y;
00345             for( j = 0; j < nl; ++j ) {
00346                 ASSERT(0<=z && z<nz);
00347                 poim[ z ] += valR - valW;
00348                 z += nk;
00349             }
00350         }
00351     }
00352 }
00353 
00354 template <>
00355 void CTrie<POIMTrie>::POIMs_add_SLR_helper1(
00356     const int32_t nodeIdx, const int32_t depth, const int32_t i,
00357     const int32_t y0, float64_t* const* const poims, const int32_t K,
00358     const int32_t debug)
00359 {
00360     ASSERT(nodeIdx!=NO_CHILD);
00361     ASSERT(depth<K);
00362     POIMTrie* const node = &TreeMem[ nodeIdx ];
00363     int32_t sym;
00364     if( depth < degree-1 )
00365     {
00366         if( depth < K-1 )
00367         {
00368             for( sym = 0; sym < NUM_SYMS; ++sym )
00369             {
00370                 const int32_t childIdx = node->children[ sym ];
00371                 if( childIdx != NO_CHILD )
00372                 {
00373                     POIMTrie* const child = &TreeMem[ childIdx ];
00374                     const int32_t y = y0 + sym;
00375                     const int32_t y1 = y * NUM_SYMS;
00376                     const float64_t w = child->weight;
00377                     POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
00378                     POIMs_add_SLR_helper1( childIdx, depth+1, i, y1, poims, K, debug );
00379                 }
00380             }
00381         }
00382         else
00383         {
00384             ASSERT(depth==K-1);
00385             for( sym = 0; sym < NUM_SYMS; ++sym )
00386             {
00387                 const int32_t childIdx = node->children[ sym ];
00388                 if( childIdx != NO_CHILD ) {
00389                     POIMTrie* const child = &TreeMem[ childIdx ];
00390                     const int32_t y = y0 + sym;
00391                     const float64_t w = child->weight;
00392                     POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
00393                 }
00394             }
00395         }
00396     }
00397     else
00398     {
00399         ASSERT(depth==degree-1);
00400         for( sym = 0; sym < NUM_SYMS; ++sym )
00401         {
00402             const float64_t w = node->child_weights[ sym ];
00403             const int32_t y = y0 + sym;
00404             POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, w, w, w, debug );
00405         }
00406     }
00407 }
00408 
00409 
00410 
00411 template <>
00412 void CTrie<POIMTrie>::POIMs_add_SLR(
00413     float64_t* const* const poims, const int32_t K, const int32_t debug)
00414 {
00415   ASSERT(degree>=1);
00416   ASSERT(K>=1);
00417   {
00418     const int32_t m = ( degree > K ) ? degree : K;
00419     nofsKmers = SG_MALLOC(int32_t,  m+1 );
00420     int32_t n;
00421     int32_t k;
00422     n = 1;
00423     for( k = 0; k < m+1; ++k ) {
00424       nofsKmers[ k ] = n;
00425       n *= NUM_SYMS;
00426     }
00427   }
00428   const int32_t N = length;
00429   int32_t i;
00430   for( i = 0; i < N; ++i ) {
00431     POIMs_add_SLR_helper1( trees[i], 0, i, 0*NUM_SYMS, poims, K, debug );
00432   }
00433   SG_FREE(nofsKmers);
00434 }
00435 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation