Trie.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2009 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014 
00015 #include <string.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/base/DynArray.h>
00019 #include <shogun/mathematics/Math.h>
00020 #include <shogun/base/SGObject.h>
00021 
00022 namespace shogun
00023 {
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025 
00026 // sentinel is 0xFFFFFFFC or float -2
00027 #define NO_CHILD ((int32_t)-1073741824)
00028 
00029 #define WEIGHTS_IN_TRIE 
00030 //#define TRIE_CHECK_EVERYTHING
00031 
00032 #ifdef TRIE_CHECK_EVERYTHING
00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00034 #else
00035 #define TRIE_ASSERT_EVERYTHING(x) 
00036 #endif
00037 
00038 //#define TRIE_ASSERT(x) ASSERT(x)
00039 #define TRIE_ASSERT(x) 
00040 
00041 #define TRIE_TERMINAL_CHARACTER  7
00042 
00044 struct ConsensusEntry
00045 {
00047     uint64_t string;
00049     float32_t score;
00051     int32_t bt;
00052 };
00053 
00055 struct POIMTrie
00056 {
00058     float64_t weight;
00059 #ifdef TRIE_CHECK_EVERYTHING
00060 
00061     bool has_seq;
00063     bool has_floats;
00064 #endif      
00065     union
00066     {
00068         float32_t child_weights[4];
00070         int32_t children[4];
00072         uint8_t seq[16] ;
00073     }; 
00074 
00076     float64_t S;
00078     float64_t L;
00080     float64_t R;
00081 };
00082 
00084 struct DNATrie
00085 {
00087     float64_t weight;
00088 #ifdef TRIE_CHECK_EVERYTHING
00089 
00090     bool has_seq;
00092     bool has_floats;
00093 #endif
00094     union
00095     {
00097         float32_t child_weights[4];
00099         int32_t children[4];
00101         uint8_t seq[16] ;
00102     }; 
00103 };
00104 
00106 struct TreeParseInfo {
00108     int32_t num_sym;
00110     int32_t num_feat;
00112     int32_t p;
00114     int32_t k;
00116     int32_t* nofsKmers;
00118     float64_t* margFactors;
00120     int32_t* x;
00122     int32_t* substrs;
00124     int32_t y0;
00126     float64_t* C_k;
00128     float64_t* L_k;
00130     float64_t* R_k;
00131 };
00132 
00133 #endif // DOXYGEN_SHOULD_SKIP_THIS
00134 
00135 template <class Trie> class CTrie;
00136 
00137 #define IGNORE_IN_CLASSLIST
00138 
00156 IGNORE_IN_CLASSLIST template <class Trie> class CTrie : public CSGObject
00157 {
00158     public:
00160         CTrie();
00161 
00168         CTrie(int32_t d, bool p_use_compact_terminal_nodes=true);
00169 
00171         CTrie(const CTrie & to_copy);
00172         virtual ~CTrie();
00173 
00175         const CTrie & operator=(const CTrie & to_copy);
00176 
00184         bool compare_traverse(
00185             int32_t node, const CTrie & other, int32_t other_node);
00186 
00192         bool compare(const CTrie & other);
00193 
00200         bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const;
00201 
00208         int32_t find_deepest_node(
00209             int32_t start_node, int32_t &deepest_node) const;
00210 
00215         void display_node(int32_t node) const;
00216 
00218         void destroy();
00219 
00224         void set_degree(int32_t d);
00225 
00232         void create(int32_t len, bool p_use_compact_terminal_nodes=true);
00233 
00239         void delete_trees(bool p_use_compact_terminal_nodes=true);
00240 
00251         void add_to_trie(
00252             int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha,
00253             float64_t *weights, bool degree_times_position_weights);
00254 
00261         float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
00262 
00268         float64_t* compute_abs_weights(int32_t &len);
00269 
00282         float64_t compute_by_tree_helper(
00283             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00284             int32_t weight_pos, float64_t * weights,
00285             bool degree_times_position_weights) ;
00286 
00301         void compute_by_tree_helper(
00302             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00303             int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
00304             int32_t mkl_stepsize, float64_t * weights,
00305             bool degree_times_position_weights);
00306 
00321         void compute_scoring_helper(
00322             int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
00323             int32_t max_degree, int32_t num_feat, int32_t num_sym,
00324             int32_t sym_offset, int32_t offs, float64_t* result);
00325 
00338         void add_example_to_tree_mismatch_recursion(
00339             int32_t tree,  int32_t i, float64_t alpha, int32_t *vec,
00340             int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
00341             int32_t max_mismatch, float64_t * weights);
00342 
00352         void traverse(
00353             int32_t tree, const int32_t p, struct TreeParseInfo info,
00354             const int32_t depth, int32_t* const x, const int32_t k);
00355 
00365         void count(
00366             const float64_t w, const int32_t depth,
00367             const struct TreeParseInfo info, const int32_t p, int32_t* x,
00368             const int32_t k);
00369 
00376         int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights);
00377 
00386         float64_t get_cumulative_score(
00387             int32_t pos, uint64_t seq, int32_t deg, float64_t* weights);
00388 
00398         void fill_backtracking_table_recursion(
00399             Trie* tree, int32_t depth, uint64_t seq, float64_t value,
00400             DynArray<ConsensusEntry>* table, float64_t* weights);
00401 
00410         void fill_backtracking_table(
00411             int32_t pos, DynArray<ConsensusEntry>* prev,
00412             DynArray<ConsensusEntry>* cur, bool cumulative,
00413             float64_t* weights);
00414 
00420         void POIMs_extract_W(float64_t* const* const W, const int32_t K);
00421 
00426         void POIMs_precalc_SLR(const float64_t* const distrib);
00427 
00437         void POIMs_get_SLR(
00438             const int32_t parentIdx, const int32_t sym, const int32_t depth,
00439             float64_t* S, float64_t* L, float64_t* R);
00440 
00447         void POIMs_add_SLR(
00448             float64_t* const* const poims, const int32_t K,
00449             const int32_t debug);
00450 
00455         inline bool get_use_compact_terminal_nodes()
00456         {
00457             return use_compact_terminal_nodes ;
00458         }
00459 
00465         inline void set_use_compact_terminal_nodes(
00466             bool p_use_compact_terminal_nodes)
00467         {
00468             use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00469         }
00470 
00475         inline int32_t get_num_used_nodes()
00476         {
00477             return TreeMemPtr;
00478         }
00479 
00484         inline void set_position_weights(float64_t* p_position_weights)
00485         {
00486             position_weights=p_position_weights;
00487         }
00488 
00493         inline int32_t get_node(bool last_node=false)
00494         {
00495             int32_t ret = TreeMemPtr++;
00496             check_treemem() ;
00497 
00498             if (last_node)
00499             {
00500                 for (int32_t q=0; q<4; q++)
00501                     TreeMem[ret].child_weights[q]=0.0;
00502             }
00503             else
00504             {
00505                 for (int32_t q=0; q<4; q++)
00506                     TreeMem[ret].children[q]=NO_CHILD;
00507             }
00508 #ifdef TRIE_CHECK_EVERYTHING
00509             TreeMem[ret].has_seq=false ;
00510             TreeMem[ret].has_floats=false ;
00511 #endif
00512             TreeMem[ret].weight=0.0;
00513             return ret ;
00514         }
00515 
00517         inline void check_treemem()
00518         {
00519             if (TreeMemPtr+10 < TreeMemPtrMax)
00520                 return;
00521             SG_DEBUG( "Extending TreeMem from %i to %i elements\n",
00522                     TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2));
00523             TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2);
00524             TreeMem = SG_REALLOC(Trie, TreeMem, TreeMemPtrMax);
00525         }
00526 
00531         inline void set_weights_in_tree(bool weights_in_tree_)
00532         {
00533             weights_in_tree = weights_in_tree_;
00534         }
00535 
00540         inline bool get_weights_in_tree()
00541         {
00542             return weights_in_tree;
00543         }
00544 
00554         void POIMs_extract_W_helper(
00555             const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00556             const int32_t y0, float64_t* const* const W, const int32_t K);
00557 
00570         void POIMs_calc_SLR_helper1(
00571             const float64_t* const distrib, const int32_t i,
00572             const int32_t nodeIdx, int32_t left_tries_idx[4],
00573             const int32_t depth, int32_t const lastSym, float64_t* S,
00574             float64_t* L, float64_t* R);
00575 
00576 
00587         void POIMs_calc_SLR_helper2(
00588             const float64_t* const distrib, const int32_t i,
00589             const int32_t nodeIdx, int32_t left_tries_idx[4],
00590             const int32_t depth, float64_t* S, float64_t* L, float64_t* R);
00591 
00602         void POIMs_add_SLR_helper1(
00603             const int32_t nodeIdx, const int32_t depth,const int32_t i,
00604             const int32_t y0, float64_t* const* const poims, const int32_t K,
00605             const int32_t debug);
00606 
00620         void POIMs_add_SLR_helper2(
00621             float64_t* const* const poims, const int32_t K, const int32_t k,
00622             const int32_t i, const int32_t y, const float64_t valW,
00623             const float64_t valS, const float64_t valL, const float64_t valR,
00624             const int32_t debug);
00625 
00627         inline virtual const char* get_name() const { return "Trie"; }
00628 
00629     public:
00631         int32_t NUM_SYMS;
00632 
00633     protected:
00635         int32_t length;
00637         int32_t * trees;
00638 
00640         int32_t degree;
00642         float64_t*  position_weights;
00643 
00645         Trie* TreeMem;
00647         int32_t TreeMemPtr;
00649         int32_t TreeMemPtrMax;
00651         bool use_compact_terminal_nodes;
00652 
00654         bool weights_in_tree;
00655 
00657         int32_t* nofsKmers;
00658 };
00659     template <class Trie>
00660     CTrie<Trie>::CTrie()
00661     : CSGObject(), degree(0), position_weights(NULL),
00662         use_compact_terminal_nodes(false),
00663         weights_in_tree(true)
00664     {
00665 
00666         TreeMemPtrMax=0;
00667         TreeMemPtr=0;
00668         TreeMem=NULL;
00669 
00670         length=0;
00671         trees=NULL;
00672 
00673         NUM_SYMS=4;
00674     }
00675 
00676     template <class Trie>
00677     CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes)
00678     : CSGObject(), degree(d), position_weights(NULL),
00679         use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00680         weights_in_tree(true)
00681     {
00682         TreeMemPtrMax=1024*1024/sizeof(Trie);
00683         TreeMemPtr=0;
00684         TreeMem=SG_MALLOC(Trie, TreeMemPtrMax);
00685 
00686         length=0;
00687         trees=NULL;
00688 
00689         NUM_SYMS=4;
00690     }
00691 
00692     template <class Trie>
00693     CTrie<Trie>::CTrie(const CTrie & to_copy)
00694     : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00695         use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00696     {
00697         if (to_copy.position_weights!=NULL)
00698         {
00699             position_weights = to_copy.position_weights;
00700             /*SG_MALLOC(float64_t, to_copy.length);
00701               for (int32_t i=0; i<to_copy.length; i++)
00702               position_weights[i]=to_copy.position_weights[i]; */
00703         }
00704         else
00705             position_weights=NULL;
00706 
00707         TreeMemPtrMax=to_copy.TreeMemPtrMax;
00708         TreeMemPtr=to_copy.TreeMemPtr;
00709         TreeMem=SG_MALLOC(Trie, TreeMemPtrMax);
00710         memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00711 
00712         length=to_copy.length;
00713         trees=SG_MALLOC(int32_t, length);
00714         for (int32_t i=0; i<length; i++)
00715             trees[i]=to_copy.trees[i];
00716 
00717         NUM_SYMS=4;
00718     }
00719 
00720     template <class Trie>
00721 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00722 {
00723     degree=to_copy.degree ;
00724     use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00725 
00726     SG_FREE(position_weights);
00727     position_weights=NULL ;
00728     if (to_copy.position_weights!=NULL)
00729     {
00730         position_weights=to_copy.position_weights ;
00731         /*position_weights = SG_MALLOC(float64_t, to_copy.length);
00732           for (int32_t i=0; i<to_copy.length; i++)
00733           position_weights[i]=to_copy.position_weights[i] ;*/
00734     }
00735     else
00736         position_weights=NULL ;
00737 
00738     TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00739     TreeMemPtr=to_copy.TreeMemPtr ;
00740     SG_FREE(TreeMem) ;
00741     TreeMem = SG_MALLOC(Trie, TreeMemPtrMax);
00742     memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00743 
00744     length = to_copy.length ;
00745     if (trees)
00746         SG_FREE(trees);
00747     trees=SG_MALLOC(int32_t, length);       
00748     for (int32_t i=0; i<length; i++)
00749         trees[i]=to_copy.trees[i] ;
00750 
00751     return *this ;
00752 }
00753 
00754 template <class Trie>
00755 int32_t CTrie<Trie>::find_deepest_node(
00756     int32_t start_node, int32_t& deepest_node) const
00757 {
00758 #ifdef TRIE_CHECK_EVERYTHING
00759     int32_t ret=0 ;
00760     SG_DEBUG("start_node=%i\n", start_node) ;
00761 
00762     if (start_node==NO_CHILD) 
00763     {
00764         for (int32_t i=0; i<length; i++)
00765         {
00766             int32_t my_deepest_node ;
00767             int32_t depth=find_deepest_node(i, my_deepest_node) ;
00768             SG_DEBUG("start_node %i depth=%i\n", i, depth) ;
00769             if (depth>ret)
00770             {
00771                 deepest_node=my_deepest_node ;
00772                 ret=depth ;
00773             }
00774         }
00775         return ret ;
00776     }
00777 
00778     if (TreeMem[start_node].has_seq)
00779     {
00780         for (int32_t q=0; q<16; q++)
00781             if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00782                 ret++ ;
00783         deepest_node=start_node ;
00784         return ret ;
00785     }
00786     if (TreeMem[start_node].has_floats)
00787     {
00788         deepest_node=start_node ;
00789         return 1 ;
00790     }
00791 
00792     for (int32_t q=0; q<4; q++)
00793     {
00794         int32_t my_deepest_node ;
00795         if (TreeMem[start_node].children[q]==NO_CHILD)
00796             continue ;
00797         int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00798         if (depth>ret)
00799         {
00800             deepest_node=my_deepest_node ;
00801             ret=depth ;
00802         }
00803     }
00804     return ret ;
00805 #else
00806     SG_ERROR( "not implemented\n") ;
00807     return 0 ;
00808 #endif
00809 }
00810 
00811     template <class Trie>
00812 int32_t CTrie<Trie>::compact_nodes(
00813     int32_t start_node, int32_t depth, float64_t * weights) 
00814 {
00815     SG_ERROR( "code buggy\n") ;
00816 
00817     int32_t ret=0 ;
00818 
00819     if (start_node==NO_CHILD) 
00820     {
00821         for (int32_t i=0; i<length; i++)
00822             compact_nodes(i,1, weights) ;
00823         return 0 ;
00824     }
00825     if (start_node<0)
00826         return -1 ;
00827 
00828     if (depth==degree-1)
00829     {
00830         TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
00831         int32_t num_used=0 ;
00832         for (int32_t q=0; q<4; q++)
00833             if (TreeMem[start_node].child_weights[q]!=0.0)
00834                 num_used++ ;
00835         if (num_used>1)
00836             return -1 ;
00837         return 1 ;
00838     }
00839     TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
00840 
00841     int32_t num_used = 0 ;
00842     int32_t q_used=-1 ;
00843 
00844     for (int32_t q=0; q<4; q++)
00845     {
00846         if (TreeMem[start_node].children[q]==NO_CHILD)
00847             continue ;
00848         num_used++ ;
00849         q_used=q ;
00850     }
00851     if (num_used>1)
00852     {
00853         if (depth>=degree-2)
00854             return -1 ;
00855         for (int32_t q=0; q<4; q++)
00856         {
00857             if (TreeMem[start_node].children[q]==NO_CHILD)
00858                 continue ;
00859             int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00860             if (num<=2)
00861                 continue ;
00862             int32_t node=get_node() ;
00863 
00864             int32_t last_node=TreeMem[start_node].children[q] ;
00865             if (weights_in_tree)
00866             {
00867                 ASSERT(weights[depth]!=0.0) ;
00868                 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00869             }
00870             else
00871                 TreeMem[node].weight=TreeMem[last_node].weight ;
00872 
00873 #ifdef TRIE_CHECK_EVERYTHING
00874             TreeMem[node].has_seq=true ;
00875 #endif
00876             memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00877             for (int32_t n=0; n<num; n++)
00878             {
00879                 ASSERT(depth+n+1<=degree-1) ;
00880                 ASSERT(last_node!=NO_CHILD) ;
00881                 if (depth+n+1==degree-1)
00882                 {
00883                     TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
00884                     int32_t  k ;
00885                     for (k=0; k<4; k++)
00886                         if (TreeMem[last_node].child_weights[k]!=0.0)
00887                             break ;
00888                     if (k==4)
00889                         break ;
00890                     TreeMem[node].seq[n]=k ;
00891                     break ;
00892                 }
00893                 else
00894                 {
00895                     TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
00896                     int32_t k ;
00897                     for (k=0; k<4; k++)
00898                         if (TreeMem[last_node].children[k]!=NO_CHILD)
00899                             break ;
00900                     if (k==4)
00901                         break ;
00902                     TreeMem[node].seq[n]=k ;
00903                     last_node=TreeMem[last_node].children[k] ;
00904                 }
00905             }
00906             TreeMem[start_node].children[q]=-node ;
00907         }
00908         return -1 ;
00909     }
00910     if (num_used==0)
00911         return 0 ;
00912 
00913     ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00914     if (ret<0)
00915         return ret ;
00916     return ret+1 ;
00917 }
00918 
00919 
00920     template <class Trie>
00921 bool CTrie<Trie>::compare_traverse(
00922     int32_t node, const CTrie<Trie> & other, int32_t other_node)
00923 {
00924     SG_DEBUG("checking nodes %i and %i\n", node, other_node) ;
00925     if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00926     {
00927         SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ;
00928         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00929         display_node(node) ;
00930         SG_DEBUG( "============================================================\n") ;           
00931         other.display_node(other_node) ;
00932         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00933         return false ;
00934     }
00935 
00936 #ifdef TRIE_CHECK_EVERYTHING
00937     if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00938     {
00939         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ;
00940         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00941         display_node(node) ;
00942         SG_DEBUG( "============================================================\n") ;           
00943         other.display_node(other_node) ;
00944         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00945         return false ;
00946     }
00947     if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00948     {
00949         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ;
00950         return false ;
00951     }
00952     if (other.TreeMem[other_node].has_floats)
00953     {
00954         for (int32_t q=0; q<4; q++)
00955             if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00956             {
00957                 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ;
00958                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00959                 display_node(node) ;
00960                 SG_DEBUG( "============================================================\n") ;           
00961                 other.display_node(other_node) ;
00962                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00963                 return false ;
00964             }
00965     }
00966     if (other.TreeMem[other_node].has_seq)
00967     {
00968         for (int32_t q=0; q<16; q++)
00969             if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00970             {
00971                 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ;
00972                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00973                 display_node(node) ;
00974                 SG_DEBUG( "============================================================\n") ;           
00975                 other.display_node(other_node) ;
00976                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00977                 return false ;
00978             }
00979     }
00980     if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00981     {
00982         for (int32_t q=0; q<4; q++)
00983         {
00984             if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00985                 continue ;
00986             if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00987             {
00988                 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ;
00989                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00990                 display_node(node) ;
00991                 SG_DEBUG( "============================================================\n") ;           
00992                 other.display_node(other_node) ;
00993                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00994                 return false ;
00995             }
00996             if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
00997                 return false ;
00998         }
00999     }
01000 #else
01001     SG_ERROR( "not implemented\n") ;
01002 #endif
01003 
01004     return true ;
01005 }
01006 
01007     template <class Trie>
01008 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
01009 {
01010     bool ret=true ;
01011     for (int32_t i=0; i<length; i++)
01012         if (!compare_traverse(trees[i], other, other.trees[i]))
01013             return false ;
01014         else
01015             SG_DEBUG("two tries at %i identical\n", i) ;
01016 
01017     return ret ;
01018 }
01019 
01020 template <class Trie>
01021 bool CTrie<Trie>::find_node(
01022     int32_t node, int32_t * trace, int32_t& trace_len) const 
01023 {
01024 #ifdef TRIE_CHECK_EVERYTHING
01025     ASSERT(trace_len-1>=0) ;
01026     ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
01027         if (TreeMem[trace[trace_len-1]].has_seq)
01028             return false ;
01029     if (TreeMem[trace[trace_len-1]].has_floats)
01030         return false ;
01031 
01032     for (int32_t q=0; q<4; q++)
01033     {
01034         if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
01035             continue ;
01036         int32_t tl=trace_len+1 ;
01037         if (TreeMem[trace[trace_len-1]].children[q]>=0)
01038             trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
01039         else
01040             trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
01041 
01042         if (trace[trace_len]==node)
01043         {
01044             trace_len=tl ;
01045             return true ;
01046         }
01047         if (find_node(node, trace, tl))
01048         {
01049             trace_len=tl ;
01050             return true ;
01051         }
01052     }
01053     trace_len=0 ;
01054     return false ;
01055 #else
01056     SG_ERROR( "not implemented\n") ;
01057     return false ;
01058 #endif
01059 }
01060 
01061 template <class Trie>
01062 void CTrie<Trie>::display_node(int32_t node) const
01063 {
01064 #ifdef TRIE_CHECK_EVERYTHING
01065     int32_t * trace=SG_MALLOC(int32_t, 2*degree);
01066     int32_t trace_len=-1 ;
01067     bool found = false ;
01068     int32_t tree=-1 ;
01069     for (tree=0; tree<length; tree++)
01070     {
01071         trace[0]=trees[tree] ;
01072         trace_len=1 ;
01073         found=find_node(node, trace, trace_len) ;
01074         if (found)
01075             break ;
01076     }
01077     ASSERT(found) ;
01078     SG_PRINT( "position %i  trace: ", tree) ;
01079 
01080     for (int32_t i=0; i<trace_len-1; i++)
01081     {
01082         int32_t branch=-1 ;
01083         for (int32_t q=0; q<4; q++)
01084             if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
01085             {
01086                 branch=q;
01087                 break ;
01088             }
01089         ASSERT(branch!=-1) ;
01090         char acgt[5]="ACGT" ;
01091         SG_PRINT( "%c", acgt[branch]) ;
01092     }
01093     SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
01094     if (TreeMem[node].has_floats)
01095     {
01096         for (int32_t q=0; q<4; q++)
01097             SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
01098     }
01099     if (TreeMem[node].has_seq)
01100     {
01101         for (int32_t q=0; q<16; q++)
01102             SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
01103     }
01104     if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01105     {
01106         for (int32_t q=0; q<4; q++)
01107         {
01108             if (TreeMem[node].children[q]!=NO_CHILD)
01109             {
01110                 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
01111                 display_node(abs(TreeMem[node].children[q])) ;
01112             }
01113             else
01114                 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
01115         }
01116 
01117     }
01118 
01119     SG_FREE(trace);
01120 #else
01121     SG_ERROR( "not implemented\n") ;
01122 #endif
01123 }
01124 
01125 
01126 template <class Trie> CTrie<Trie>::~CTrie()
01127 {
01128     destroy() ;
01129 
01130     SG_FREE(TreeMem) ;
01131 }
01132 
01133 template <class Trie> void CTrie<Trie>::destroy()
01134 {
01135     if (trees!=NULL)
01136     {
01137         delete_trees();
01138         for (int32_t i=0; i<length; i++)
01139             trees[i] = NO_CHILD;
01140         SG_FREE(trees);
01141 
01142         TreeMemPtr=0;
01143         length=0;
01144         trees=NULL;
01145     }
01146 }
01147 
01148 template <class Trie> void CTrie<Trie>::set_degree(int32_t d)
01149 {
01150     delete_trees(get_use_compact_terminal_nodes());
01151     degree=d;
01152 }
01153 
01154 template <class Trie> void CTrie<Trie>::create(
01155     int32_t len, bool p_use_compact_terminal_nodes)
01156 {
01157     destroy();
01158 
01159     trees=SG_MALLOC(int32_t, len);      
01160     TreeMemPtr=0 ;
01161     for (int32_t i=0; i<len; i++)
01162         trees[i]=get_node(degree==1);
01163     length = len ;
01164 
01165     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01166 }
01167 
01168 
01169 template <class Trie> void CTrie<Trie>::delete_trees(
01170     bool p_use_compact_terminal_nodes)
01171 {
01172     if (trees==NULL)
01173         return;
01174 
01175     TreeMemPtr=0 ;
01176     for (int32_t i=0; i<length; i++)
01177         trees[i]=get_node(degree==1);
01178 
01179     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01180 } 
01181 
01182     template <class Trie>
01183 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth)
01184 {
01185     float64_t ret=0 ;
01186 
01187     if (tree==NO_CHILD)
01188         return 0 ;
01189     TRIE_ASSERT(tree>=0) ;
01190 
01191     if (depth==degree-2)
01192     {
01193         ret+=(TreeMem[tree].weight) ;
01194 
01195         for (int32_t k=0; k<4; k++)
01196             ret+=(TreeMem[tree].child_weights[k]) ;
01197 
01198         return ret ;
01199     }
01200 
01201     ret+=(TreeMem[tree].weight) ;
01202 
01203     for (int32_t i=0; i<4; i++)
01204         if (TreeMem[tree].children[i]!=NO_CHILD)
01205             ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1)  ;
01206 
01207     return ret ;
01208 }
01209 
01210 
01211     template <class Trie>
01212 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len)
01213 {
01214     float64_t * sum=SG_MALLOC(float64_t, length*4);
01215     for (int32_t i=0; i<length*4; i++)
01216         sum[i]=0 ;
01217     len=length ;
01218 
01219     for (int32_t i=0; i<length; i++)
01220     {
01221         TRIE_ASSERT(trees[i]!=NO_CHILD) ;
01222         for (int32_t k=0; k<4; k++)
01223         {
01224             sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01225         }
01226     }
01227 
01228     return sum ;
01229 }
01230 
01231     template <class Trie>
01232 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(
01233     int32_t tree,  int32_t i, float64_t alpha,
01234         int32_t *vec, int32_t len_rem, 
01235         int32_t degree_rec, int32_t mismatch_rec, 
01236         int32_t max_mismatch, float64_t * weights) 
01237 {
01238     if (tree==NO_CHILD)
01239         tree=trees[i] ;
01240     TRIE_ASSERT(tree!=NO_CHILD) ;
01241 
01242     if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01243         return ;
01244     const int32_t other[4][3] = {   {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01245 
01246     int32_t subtree = NO_CHILD ;
01247 
01248     if (degree_rec==degree-1)
01249     {
01250         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01251         if (weights_in_tree)
01252             TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01253         else
01254             if (weights[degree_rec]!=0.0)
01255                 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01256         if (mismatch_rec+1<=max_mismatch)
01257             for (int32_t o=0; o<3; o++)
01258             {
01259                 if (weights_in_tree)
01260                     TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01261                 else
01262                     if (weights[degree_rec]!=0.0)
01263                         TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01264             }
01265         return ;
01266     }
01267     else
01268     {
01269         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01270         if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01271         {
01272             subtree=TreeMem[tree].children[vec[0]] ;
01273             if (weights_in_tree)
01274                 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01275             else
01276                 if (weights[degree_rec]!=0.0)
01277                     TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01278         }
01279         else 
01280         {
01281             int32_t tmp = get_node(degree_rec==degree-2);
01282             ASSERT(tmp>=0) ;
01283             TreeMem[tree].children[vec[0]]=tmp ;
01284             subtree=tmp ;
01285 #ifdef TRIE_CHECK_EVERYTHING
01286             if (degree_rec==degree-2)
01287                 TreeMem[subtree].has_floats=true ;
01288 #endif
01289             if (weights_in_tree)
01290                 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01291             else
01292                 if (weights[degree_rec]!=0.0)
01293                     TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01294                 else
01295                     TreeMem[subtree].weight = 0.0 ;
01296         }
01297         add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01298                 &vec[1], len_rem-1, 
01299                 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01300 
01301         if (mismatch_rec+1<=max_mismatch)
01302         {
01303             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01304             for (int32_t o=0; o<3; o++)
01305             {
01306                 int32_t ot = other[vec[0]][o] ;
01307                 if (TreeMem[tree].children[ot]!=NO_CHILD)
01308                 {
01309                     subtree=TreeMem[tree].children[ot] ;
01310                     if (weights_in_tree)
01311                         TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01312                     else
01313                         if (weights[degree_rec]!=0.0)
01314                             TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01315                 }
01316                 else 
01317                 {
01318                     int32_t tmp = get_node(degree_rec==degree-2);
01319                     ASSERT(tmp>=0) ;
01320                     TreeMem[tree].children[ot]=tmp ;
01321                     subtree=tmp ;
01322 #ifdef TRIE_CHECK_EVERYTHING
01323                     if (degree_rec==degree-2)
01324                         TreeMem[subtree].has_floats=true ;
01325 #endif
01326 
01327                     if (weights_in_tree)
01328                         TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01329                     else
01330                         if (weights[degree_rec]!=0.0)
01331                             TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01332                         else
01333                             TreeMem[subtree].weight = 0.0 ;
01334                 }
01335 
01336                 add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01337                         &vec[1], len_rem-1, 
01338                         degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01339             }
01340         }
01341     }
01342 }
01343 
01344     template <class Trie>
01345 void CTrie<Trie>::compute_scoring_helper(
01346     int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
01347     int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
01348     int32_t offs, float64_t* result)
01349 {
01350     if (i+j<num_feat)
01351     {
01352         float64_t decay=1.0; //no decay by default
01353         //if (j>d)
01354         //  decay=pow(0.5,j); //marginalize out lower order matches
01355 
01356         if (j<degree-1)
01357         {
01358             for (int32_t k=0; k<num_sym; k++)
01359             {
01360                 if (TreeMem[tree].children[k]!=NO_CHILD)
01361                 {
01362                     int32_t child=TreeMem[tree].children[k];
01363                     //continue recursion if not yet at max_degree, else add to result
01364                     if (d<max_degree-1)
01365                         compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01366                     else
01367                         result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01368 
01370                     if (d==0)
01371                         compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01372                 }
01373             }
01374         }
01375         else if (j==degree-1)
01376         {
01377             for (int32_t k=0; k<num_sym; k++)
01378             {
01379                 //continue recursion if not yet at max_degree, else add to result
01380                 if (d<max_degree-1 && i<num_feat-1)
01381                     compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01382                 else
01383                     result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01384             }
01385         }
01386     }
01387 }
01388 
01389     template <class Trie>
01390 void CTrie<Trie>::traverse(
01391     int32_t tree, const int32_t p, struct TreeParseInfo info,
01392     const int32_t depth, int32_t* const x, const int32_t k)
01393 {
01394     const int32_t num_sym = info.num_sym;
01395     const int32_t y0 = info.y0;
01396     const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01397     //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] );
01398     //if( !( info.y0 == temp ) ) {
01399     //  printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth );
01400     //}
01401     //ASSERT( info.y0 == temp );
01402     int32_t sym;
01403     ASSERT( depth < degree );
01404     //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] );
01405     if (depth<degree-1)
01406     {
01407         for( sym=0; sym<num_sym; ++sym ) {
01408             const int32_t childNum = TreeMem[tree].children[ sym ];
01409             if( childNum != NO_CHILD ) {
01410                 int32_t child = childNum ;
01411                 x[depth] = sym;
01412                 info.substrs[depth+1] = y0 + sym;
01413                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01414                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01415                 count( TreeMem[child].weight, depth, info, p, x, k );
01416                 traverse( child, p, info, depth+1, x, k );
01417                 x[depth] = -1;
01418             }
01419         }
01420     }
01421     else if( depth == degree-1 )
01422     {
01423         for( sym=0; sym<num_sym; ++sym ) {
01424             const float64_t w = TreeMem[tree].child_weights[ sym ];
01425             if( w != 0.0 ) {
01426                 x[depth] = sym;
01427                 info.substrs[depth+1] = y0 + sym;
01428                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01429                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01430                 count( w, depth, info, p, x, k );
01431                 x[depth] = -1;
01432             }
01433         }
01434     }
01435     //info.substrs[depth+1] = -1;
01436     //info.y0 = temp;
01437 }
01438 
01439     template <class Trie>
01440 void CTrie<Trie>::count(
01441     const float64_t w, const int32_t depth, const struct TreeParseInfo info,
01442     const int32_t p, int32_t* x, const int32_t k)
01443 {
01444     ASSERT( fabs(w) < 1e10 );
01445     ASSERT( x[depth] >= 0 );
01446     ASSERT( x[depth+1] < 0 );
01447     if ( depth < k ) {
01448         return;
01449     }
01450     //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) );
01451     const int32_t nofKmers = info.nofsKmers[k];
01452     const float64_t margWeight =  w * info.margFactors[ depth-k ];
01453     const int32_t m_a = depth - k + 1;
01454     const int32_t m_b = info.num_feat - p;
01455     const int32_t m = ( m_a < m_b ) ? m_a : m_b;
01456     // all proper k-substrings
01457     const int32_t offset0 = nofKmers * p;
01458     register int32_t i;
01459     register int32_t offset;
01460     offset = offset0;
01461     for( i = 0; i < m; ++i ) {
01462         const int32_t y = info.substrs[i+k+1];
01463         info.C_k[ y + offset ] += margWeight;
01464         offset += nofKmers;
01465     }
01466     if( depth > k ) {
01467         // k-prefix
01468         const int32_t offsR = info.substrs[k+1] + offset0;
01469         info.R_k[offsR] += margWeight;
01470         // k-suffix
01471         if( p+depth-k < info.num_feat ) {
01472             const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01473             info.L_k[offsL] += margWeight; 
01474         }
01475     }
01476     //    # N.x = substring represented by N
01477     //    # N.d = length of N.x
01478     //    # N.s = starting position of N.x
01479     //    # N.w = weight for feature represented by N
01480     //    if( N.d >= k )
01481     //      margContrib = w / 4^(N.d-k)
01482     //      for i = 1 to (N.d-k+1)
01483     //        y = N.x[i:(i+k-1)]  # overlapped k-mer
01484     //        C_k[ N.s+i-1, y ] += margContrib
01485     //      end;
01486     //      if( N.d > k )
01487     //        L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib  # j-suffix of N.x
01488     //        R_k[ N.s,       N.x[1:k]         ] += margContrib  # j-prefix of N.x
01489     //      end;
01490     //    end;
01491 }
01492 
01493     template <class Trie>
01494 void CTrie<Trie>::add_to_trie(
01495     int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha,
01496     float64_t *weights, bool degree_times_position_weights)
01497 {
01498     int32_t tree = trees[i] ;
01499     //ASSERT(seq_offset==0) ;
01500 
01501     int32_t max_depth = 0 ;
01502     float64_t* weights_column ;
01503     if (degree_times_position_weights)
01504         weights_column = &weights[(i+seq_offset)*degree] ;
01505     else
01506         weights_column = weights ;
01507 
01508     if (weights_in_tree)
01509     {
01510         for (int32_t j=0; (j<degree) && (i+j<length); j++)
01511             if (CMath::abs(weights_column[j]*alpha)>0)
01512                 max_depth = j+1 ;
01513     }
01514     else
01515         // don't use the weights
01516         max_depth=degree ;
01517 
01518     for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01519     {
01520         TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
01521         if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01522         {
01523             if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01524             {
01525                 // special treatment of the next nodes
01526                 TRIE_ASSERT(j >= degree-16) ;
01527                 // get the right element
01528                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01529                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01530                 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01531 
01532                 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
01533                 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
01534                 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
01535 
01536                 // check whether the same string is stored
01537                 int32_t mismatch_pos = -1 ;
01538                 {
01539                     int32_t k ;
01540                     for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01541                     {
01542                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01543                         // ###
01544                         if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01545                             fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01546                         TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
01547                         TRIE_ASSERT(k<16) ;
01548                         if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01549                         {
01550                             mismatch_pos=k ;
01551                             break ;
01552                         }
01553                     }
01554                 }
01555 
01556                 // what happens when the .seq sequence is longer than vec? should we branch???
01557 
01558                 if (mismatch_pos==-1)
01559                     // if so, then just increase the weight by alpha and stop
01560                     TreeMem[node].weight+=alpha ;
01561                 else
01562                     // otherwise
01563                     // 1. replace current node with new node
01564                     // 2. create new nodes until mismatching positon
01565                     // 2. add a branch with old string (old node) and the new string (new node)
01566                 {
01567                     // replace old node
01568                     int32_t last_node=tree ;
01569 
01570                     // create new nodes until mismatch
01571                     int32_t k ;
01572                     for (k=0; k<mismatch_pos; k++)
01573                     {
01574                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01575                         TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
01576 
01577                         int32_t tmp=get_node();
01578                         TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01579                         last_node=tmp ;
01580                         if (weights_in_tree)
01581                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01582                         else
01583                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01584                         TRIE_ASSERT(j+k!=degree-1) ;
01585                     }
01586                     if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01587                         fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01588                     ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
01589                     TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
01590 
01591                     if (j+k==degree-1)
01592                     {
01593                         // init child weights with zero if after dropping out
01594                         // of the k<mismatch_pos loop we are one level below degree
01595                         // (keep this even after get_node() change!)
01596                         for (int32_t q=0; q<4; q++)
01597                             TreeMem[last_node].child_weights[q]=0.0 ;
01598                         if (weights_in_tree)
01599                         {
01600                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01601                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01602                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01603                         }
01604                         else
01605                         {
01606                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01607                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01608                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01609                         }
01610 
01611 #ifdef TRIE_CHECK_EVERYTHING
01612                         TreeMem[last_node].has_floats=true ;
01613 #endif
01614                     }
01615                     else
01616                     {
01617                         // the branch for the existing string
01618                         if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01619                         {
01620                             TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01621 
01622                             // move string by mismatch_pos positions
01623                             for (int32_t q=0; q<16; q++)
01624                             {
01625                                 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01626                                     TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01627                                 else
01628                                     TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01629                             }
01630 #ifdef TRIE_CHECK_EVERYTHING
01631                             TreeMem[node].has_seq=true ;
01632 #endif
01633                         }
01634 
01635                         // the new branch
01636                         TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
01637                         int32_t tmp = get_node() ;
01638                         TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01639                         last_node=tmp ;
01640 
01641                         TreeMem[last_node].weight = alpha ;
01642 #ifdef TRIE_CHECK_EVERYTHING
01643                         TreeMem[last_node].has_seq = true ;
01644 #endif
01645                         memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01646                         for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01647                             TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01648                     }
01649                 }
01650                 break ;
01651             } 
01652             else
01653             {
01654                 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01655                 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01656                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01657                 if (weights_in_tree)
01658                     TreeMem[tree].weight += alpha*weights_column[j];
01659                 else
01660                     TreeMem[tree].weight += alpha ;
01661             }
01662         }
01663         else if (j==degree-1)
01664         {
01665             // special treatment of the last node
01666             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01667             TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01668             if (weights_in_tree)
01669                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01670             else
01671                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01672 
01673             break;
01674         }
01675         else
01676         {
01677             bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01678             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01679             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01680 
01681             int32_t tmp = get_node((j==degree-2) && (!use_seq));
01682             if (use_seq)
01683                 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01684             else
01685                 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01686             tree=tmp ;
01687 
01688             TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01689 #ifdef TRIE_CHECK_EVERYTHING
01690             TreeMem[tree].has_seq = use_seq ;
01691 #endif
01692             if (use_seq)
01693             {
01694                 TreeMem[tree].weight = alpha ;
01695                 // important to have the terminal characters (see ###)
01696                 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01697                 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01698                 {
01699                     TRIE_ASSERT(q<16) ;
01700                     TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01701                 }
01702                 break ;
01703             }
01704             else
01705             {
01706                 if (weights_in_tree)
01707                     TreeMem[tree].weight = alpha*weights_column[j] ;
01708                 else
01709                     TreeMem[tree].weight = alpha ;
01710 #ifdef TRIE_CHECK_EVERYTHING
01711                 if (j==degree-2)
01712                     TreeMem[tree].has_floats = true ;
01713 #endif
01714             }
01715         }
01716     }
01717 }
01718 
01719     template <class Trie>
01720 float64_t CTrie<Trie>::compute_by_tree_helper(
01721     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01722         int32_t weight_pos, float64_t* weights, 
01723         bool degree_times_position_weights)
01724 {
01725     int32_t tree = trees[tree_pos] ;
01726 
01727     if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01728         return 0.0;
01729 
01730     float64_t *weights_column=NULL ;
01731     if (degree_times_position_weights)
01732         weights_column=&weights[weight_pos*degree] ;
01733     else // weights is a vector (1 x degree)
01734         weights_column=weights ;
01735 
01736     float64_t sum=0 ;
01737     for (int32_t j=0; seq_pos+j < len; j++)
01738     {
01739         TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
01740 
01741         if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01742         {
01743             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01744             if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01745             {
01746                 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01747                 TRIE_ASSERT(tree>=0) ;
01748                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01749                 float64_t this_weight=0.0 ;
01750                 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01751                 {
01752                     TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
01753                     if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01754                         break ;
01755                     this_weight += weights_column[j+k] ;
01756                 }
01757                 sum += TreeMem[tree].weight * this_weight ;
01758                 break ;
01759             }
01760             else
01761             {
01762                 tree=TreeMem[tree].children[vec[seq_pos+j]];
01763                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01764                 if (weights_in_tree)
01765                     sum += TreeMem[tree].weight ;
01766                 else
01767                     sum += TreeMem[tree].weight * weights_column[j] ;
01768             } ;
01769         }
01770         else
01771         {
01772             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01773             if (j==degree-1)
01774             {
01775                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01776                 if (weights_in_tree)
01777                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01778                 else
01779                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01780             }
01781             else
01782                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01783 
01784             break;
01785         }
01786     } 
01787 
01788     if (position_weights!=NULL)
01789         return sum*position_weights[weight_pos] ;
01790     else
01791         return sum ;
01792 }
01793 
01794     template <class Trie>
01795 void CTrie<Trie>::compute_by_tree_helper(
01796     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01797     int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
01798     int32_t mkl_stepsize, float64_t * weights,
01799     bool degree_times_position_weights)
01800 {
01801     int32_t tree = trees[tree_pos] ;
01802     if (factor==0)
01803         return ;
01804 
01805     if (position_weights!=NULL)
01806     {
01807         factor *= position_weights[weight_pos] ;
01808         if (factor==0)
01809             return ;
01810         if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree)
01811         {
01812             for (int32_t j=0; seq_pos+j<len; j++)
01813             {
01814                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01815                 {
01816                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01817                     {
01818                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01819                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01820                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01821                         {
01822                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01823                                 break ;
01824                             if (weights_in_tree)
01825                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01826                             else
01827                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01828                         }
01829                         break ;
01830                     }
01831                     else
01832                     {
01833                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01834                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01835                         if (weights_in_tree)
01836                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01837                         else
01838                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01839                     }
01840                 } 
01841                 else
01842                 {
01843                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01844                     if (j==degree-1)
01845                     {
01846                         if (weights_in_tree)
01847                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01848                         else
01849                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01850                     }
01851                 }
01852             }
01853         } 
01854         else // with position_weigths, weights is a matrix (len x degree)
01855         {
01856             for (int32_t j=0; seq_pos+j<len; j++)
01857             {
01858                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01859                 {
01860                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01861                     {
01862                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01863                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01864                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01865                         {
01866                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01867                                 break ;
01868                             if (weights_in_tree)
01869                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01870                             else
01871                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01872                         }
01873                         break ;
01874                     }
01875                     else
01876                     {
01877                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01878                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01879                         if (weights_in_tree)
01880                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01881                         else
01882                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01883                     }
01884                 } 
01885                 else
01886                 {
01887                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01888                     if (j==degree-1)
01889                     {
01890                         if (weights_in_tree)
01891                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01892                         else
01893                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01894                     }         
01895                     break ;
01896                 }
01897             }
01898         } 
01899     }
01900     else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree)
01901     {
01902         for (int32_t j=0; seq_pos+j<len; j++)
01903         {
01904             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01905             {
01906                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01907                 {
01908                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01909                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01910                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01911                     {
01912                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01913                             break ;
01914                         if (weights_in_tree)
01915                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01916                         else
01917                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01918                     }
01919                     break ;
01920                 }
01921                 else
01922                 {
01923                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01924                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01925                     if (weights_in_tree)
01926                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01927                     else
01928                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01929                 }
01930             }
01931             else
01932             {
01933                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01934                 if (j==degree-1)
01935                 {
01936                     if (weights_in_tree)
01937                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01938                     else
01939                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01940                 }
01941                 break ;
01942             }
01943         } 
01944     } 
01945     else // no position_weigths, weights is a matrix (len x degree)
01946     {
01947         /*if (!position_mask)
01948           {     
01949           position_mask = SG_MALLOC(bool, len);
01950           for (int32_t i=0; i<len; i++)
01951           {
01952           position_mask[i]=false ;
01953 
01954           for (int32_t j=0; j<degree; j++)
01955           if (weights[i*degree+j]!=0.0)
01956           {
01957           position_mask[i]=true ;
01958           break ;
01959           }
01960           }
01961           }
01962           if (position_mask[weight_pos]==0)
01963           return ;*/
01964 
01965         for (int32_t j=0; seq_pos+j<len; j++)
01966         {
01967             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01968             {
01969                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01970                 {
01971                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01972                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01973                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01974                     {
01975                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01976                             break ;
01977                         if (weights_in_tree)
01978                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01979                         else
01980                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01981                     }
01982                     break ;
01983                 }
01984                 else
01985                 {
01986                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01987                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01988                     if (weights_in_tree)
01989                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01990                     else
01991                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01992                 } 
01993             }
01994             else
01995             {
01996                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01997                 if (j==degree-1)
01998                 {
01999                     if (weights_in_tree)
02000                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
02001                     else
02002                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
02003                 }
02004                 break ;
02005             }
02006         } 
02007     }
02008 }
02009 
02010     template <class Trie>
02011 void CTrie<Trie>::fill_backtracking_table_recursion(
02012     Trie* tree, int32_t depth, uint64_t seq, float64_t value,
02013     DynArray<ConsensusEntry>* table, float64_t* weights)
02014 {
02015     float64_t w=1.0;
02016 
02017     if (weights_in_tree || depth==0)
02018         value+=tree->weight;
02019     else
02020     {
02021         w=weights[degree-1];
02022         value+=weights[depth-1]*tree->weight;
02023     }
02024 
02025     if (degree-1==depth)
02026     {
02027         for (int32_t sym=0; sym<4; sym++)
02028         {
02029             float64_t v=w*tree->child_weights[sym];
02030             if (v!=0.0)
02031             {
02032                 ConsensusEntry entry;
02033                 entry.bt=-1;
02034                 entry.score=value+v;
02035                 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02036 
02037                 table->append_element(entry);
02038             }
02039         }
02040     }
02041     else
02042     {
02043         for (int32_t sym=0; sym<4; sym++)
02044         {
02045             uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02046             if (tree->children[sym] != NO_CHILD)
02047                 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
02048         }
02049     }
02050 }
02051 
02052     template <class Trie>
02053 float64_t CTrie<Trie>::get_cumulative_score(
02054     int32_t pos, uint64_t seq, int32_t deg, float64_t* weights)
02055 {
02056     float64_t result=0.0;
02057 
02058     //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq);
02059 
02060     for (int32_t i=pos; i<pos+deg && i<length; i++)
02061     {
02062         //SG_PRINT("loop %d\n", i);
02063         Trie* tree = &TreeMem[trees[i]];
02064 
02065         for (int32_t d=0; d<deg-i+pos; d++)
02066         {
02067             //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos)));
02068             ASSERT(d-1<degree);
02069             int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
02070 
02071             float64_t w=1.0;
02072             if (!weights_in_tree)
02073                 w=weights[d];
02074 
02075             ASSERT(tree->children[sym] != NO_CHILD);
02076             tree=&TreeMem[tree->children[sym]];
02077             result+=w*tree->weight;
02078         }
02079     }
02080     //SG_PRINT("cum: %f\n", result);
02081     return result;
02082 }
02083 
02084     template <class Trie>
02085 void CTrie<Trie>::fill_backtracking_table(
02086     int32_t pos, DynArray<ConsensusEntry>* prev,
02087     DynArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights)
02088 {
02089     ASSERT(pos>=0 && pos<length);
02090     ASSERT(!use_compact_terminal_nodes);
02091 
02092     Trie* t = &TreeMem[trees[pos]];
02093 
02094     fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
02095 
02096 
02097     if (cumulative)
02098     {
02099         int32_t num_cur=cur->get_num_elements();
02100         for (int32_t i=0; i<num_cur; i++)
02101         {
02102             ConsensusEntry entry=cur->get_element(i);
02103             entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
02104             cur->set_element(entry,i);
02105             //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt);
02106         }
02107     }
02108 
02109     //if previous tree exists find maximum scoring path
02110     //for each element in cur and update bt table
02111     if (prev)
02112     {
02113         int32_t num_cur=cur->get_num_elements();
02114         int32_t num_prev=prev->get_num_elements();
02115 
02116         for (int32_t i=0; i<num_cur; i++)
02117         {
02118             //uint64_t str_cur_old= cur->get_element(i).string;
02119             uint64_t str_cur= cur->get_element(i).string >> 2;
02120             //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur);
02121 
02122             int32_t bt=-1;
02123             float64_t max_score=0.0;
02124 
02125             for (int32_t j=0; j<num_prev; j++)
02126             {
02127                 //uint64_t str_prev_old= prev->get_element(j).string;
02128                 uint64_t mask=
02129                     ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
02130                 uint64_t str_prev=  mask & prev->get_element(j).string;
02131                 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask);
02132 
02133                 if (str_cur == str_prev)
02134                 {
02135                     float64_t sc=prev->get_element(j).score+cur->get_element(i).score;
02136                     if (bt==-1 || sc>max_score)
02137                     {
02138                         bt=j;
02139                         max_score=sc;
02140 
02141                         //SG_PRINT("new_max[%i,%i] = %f\n", j,i, max_score);
02142                     }
02143                 }
02144             }
02145 
02146             ASSERT(bt!=-1);
02147             ConsensusEntry entry;
02148             entry.bt=bt;
02149             entry.score=max_score;
02150             entry.string=cur->get_element(i).string;
02151             cur->set_element(entry, i);
02152             //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt);
02153         }
02154     }
02155 }
02156 }
02157 #endif // _TRIE_H___
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation