Trie.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2009 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014 
00015 #include <string.h>
00016 #include "lib/common.h"
00017 #include "lib/io.h"
00018 #include "base/DynArray.h"
00019 #include "lib/Mathematics.h"
00020 #include "base/SGObject.h"
00021 
00022 namespace shogun
00023 {
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025 
00026 // sentinel is 0xFFFFFFFC or float -2
00027 #define NO_CHILD ((int32_t)-1073741824)
00028 
00029 #define WEIGHTS_IN_TRIE 
00030 //#define TRIE_CHECK_EVERYTHING
00031 
00032 #ifdef TRIE_CHECK_EVERYTHING
00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00034 #else
00035 #define TRIE_ASSERT_EVERYTHING(x) 
00036 #endif
00037 
00038 //#define TRIE_ASSERT(x) ASSERT(x)
00039 #define TRIE_ASSERT(x) 
00040 
00041 #define TRIE_TERMINAL_CHARACTER  7
00042 
00044 struct ConsensusEntry
00045 {
00047     uint64_t string;
00049     float32_t score;
00051     int32_t bt;
00052 };
00053 
00055 struct POIMTrie
00056 {
00058     float64_t weight;
00059 #ifdef TRIE_CHECK_EVERYTHING
00060 
00061     bool has_seq;
00063     bool has_floats;
00064 #endif      
00065     union
00066     {
00068         float32_t child_weights[4];
00070         int32_t children[4];
00072         uint8_t seq[16] ;
00073     }; 
00074 
00076     float64_t S;
00078     float64_t L;
00080     float64_t R;
00081 };
00082 
00084 struct DNATrie
00085 {
00087     float64_t weight;
00088 #ifdef TRIE_CHECK_EVERYTHING
00089 
00090     bool has_seq;
00092     bool has_floats;
00093 #endif
00094     union
00095     {
00097         float32_t child_weights[4];
00099         int32_t children[4];
00101         uint8_t seq[16] ;
00102     }; 
00103 };
00104 
00106 struct TreeParseInfo {
00108     int32_t num_sym;
00110     int32_t num_feat;
00112     int32_t p;
00114     int32_t k;
00116     int32_t* nofsKmers;
00118     float64_t* margFactors;
00120     int32_t* x;
00122     int32_t* substrs;
00124     int32_t y0;
00126     float64_t* C_k;
00128     float64_t* L_k;
00130     float64_t* R_k;
00131 };
00132 
00133 #endif // DOXYGEN_SHOULD_SKIP_THIS
00134 
00135 template <class Trie> class CTrie;
00136 
00155 #define IGNORE_IN_CLASSLIST
00156 IGNORE_IN_CLASSLIST template <class Trie> class CTrie : public CSGObject
00157 {
00158     public:
00160         CTrie();
00161 
00168         CTrie(int32_t d, bool p_use_compact_terminal_nodes=true);
00169 
00171         CTrie(const CTrie & to_copy);
00172         virtual ~CTrie();
00173 
00175         const CTrie & operator=(const CTrie & to_copy);
00176 
00184         bool compare_traverse(
00185             int32_t node, const CTrie & other, int32_t other_node);
00186 
00192         bool compare(const CTrie & other);
00193 
00200         bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const;
00201 
00208         int32_t find_deepest_node(
00209             int32_t start_node, int32_t &deepest_node) const;
00210 
00215         void display_node(int32_t node) const;
00216 
00218         void destroy();
00219 
00224         void set_degree(int32_t d);
00225 
00232         void create(int32_t len, bool p_use_compact_terminal_nodes=true);
00233 
00239         void delete_trees(bool p_use_compact_terminal_nodes=true);
00240 
00251         void add_to_trie(
00252             int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha,
00253             float64_t *weights, bool degree_times_position_weights);
00254 
00261         float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
00262 
00268         float64_t* compute_abs_weights(int32_t &len);
00269 
00282         float64_t compute_by_tree_helper(
00283             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00284             int32_t weight_pos, float64_t * weights,
00285             bool degree_times_position_weights) ;
00286 
00301         void compute_by_tree_helper(
00302             int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00303             int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
00304             int32_t mkl_stepsize, float64_t * weights,
00305             bool degree_times_position_weights);
00306 
00321         void compute_scoring_helper(
00322             int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
00323             int32_t max_degree, int32_t num_feat, int32_t num_sym,
00324             int32_t sym_offset, int32_t offs, float64_t* result);
00325 
00338         void add_example_to_tree_mismatch_recursion(
00339             int32_t tree,  int32_t i, float64_t alpha, int32_t *vec,
00340             int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
00341             int32_t max_mismatch, float64_t * weights);
00342 
00352         void traverse(
00353             int32_t tree, const int32_t p, struct TreeParseInfo info,
00354             const int32_t depth, int32_t* const x, const int32_t k);
00355 
00365         void count(
00366             const float64_t w, const int32_t depth,
00367             const struct TreeParseInfo info, const int32_t p, int32_t* x,
00368             const int32_t k);
00369 
00376         int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights);
00377 
00386         float64_t get_cumulative_score(
00387             int32_t pos, uint64_t seq, int32_t deg, float64_t* weights);
00388 
00398         void fill_backtracking_table_recursion(
00399             Trie* tree, int32_t depth, uint64_t seq, float64_t value,
00400             DynArray<ConsensusEntry>* table, float64_t* weights);
00401 
00410         void fill_backtracking_table(
00411             int32_t pos, DynArray<ConsensusEntry>* prev,
00412             DynArray<ConsensusEntry>* cur, bool cumulative,
00413             float64_t* weights);
00414 
00420         void POIMs_extract_W(float64_t* const* const W, const int32_t K);
00421 
00426         void POIMs_precalc_SLR(const float64_t* const distrib);
00427 
00437         void POIMs_get_SLR(
00438             const int32_t parentIdx, const int32_t sym, const int32_t depth,
00439             float64_t* S, float64_t* L, float64_t* R);
00440 
00447         void POIMs_add_SLR(
00448             float64_t* const* const poims, const int32_t K,
00449             const int32_t debug);
00450 
00455         inline bool get_use_compact_terminal_nodes()
00456         {
00457             return use_compact_terminal_nodes ;
00458         }
00459 
00465         inline void set_use_compact_terminal_nodes(
00466             bool p_use_compact_terminal_nodes)
00467         {
00468             use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00469         }
00470 
00475         inline int32_t get_num_used_nodes()
00476         {
00477             return TreeMemPtr;
00478         }
00479 
00484         inline void set_position_weights(const float64_t * p_position_weights)
00485         {
00486             position_weights=p_position_weights;
00487         }
00488 
00493         inline int32_t get_node(bool last_node=false)
00494         {
00495             int32_t ret = TreeMemPtr++;
00496             check_treemem() ;
00497 
00498             if (last_node)
00499             {
00500                 for (int32_t q=0; q<4; q++)
00501                     TreeMem[ret].child_weights[q]=0.0;
00502             }
00503             else
00504             {
00505                 for (int32_t q=0; q<4; q++)
00506                     TreeMem[ret].children[q]=NO_CHILD;
00507             }
00508 #ifdef TRIE_CHECK_EVERYTHING
00509             TreeMem[ret].has_seq=false ;
00510             TreeMem[ret].has_floats=false ;
00511 #endif
00512             TreeMem[ret].weight=0.0;
00513             return ret ;
00514         }
00515 
00517         inline void check_treemem()
00518         {
00519             if (TreeMemPtr+10 < TreeMemPtrMax)
00520                 return;
00521             SG_DEBUG( "Extending TreeMem from %i to %i elements\n",
00522                     TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2));
00523             TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2);
00524             TreeMem = (Trie*) realloc(TreeMem,
00525                     TreeMemPtrMax*sizeof(Trie));
00526             if (!TreeMem)
00527                 SG_ERROR( "out of memory\n");
00528         }
00529 
00534         inline void set_weights_in_tree(bool weights_in_tree_)
00535         {
00536             weights_in_tree = weights_in_tree_;
00537         }
00538 
00543         inline bool get_weights_in_tree()
00544         {
00545             return weights_in_tree;
00546         }
00547 
00557         void POIMs_extract_W_helper(
00558             const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00559             const int32_t y0, float64_t* const* const W, const int32_t K);
00560 
00573         void POIMs_calc_SLR_helper1(
00574             const float64_t* const distrib, const int32_t i,
00575             const int32_t nodeIdx, int32_t left_tries_idx[4],
00576             const int32_t depth, int32_t const lastSym, float64_t* S,
00577             float64_t* L, float64_t* R);
00578 
00579 
00590         void POIMs_calc_SLR_helper2(
00591             const float64_t* const distrib, const int32_t i,
00592             const int32_t nodeIdx, int32_t left_tries_idx[4],
00593             const int32_t depth, float64_t* S, float64_t* L, float64_t* R);
00594 
00605         void POIMs_add_SLR_helper1(
00606             const int32_t nodeIdx, const int32_t depth,const int32_t i,
00607             const int32_t y0, float64_t* const* const poims, const int32_t K,
00608             const int32_t debug);
00609 
00623         void POIMs_add_SLR_helper2(
00624             float64_t* const* const poims, const int32_t K, const int32_t k,
00625             const int32_t i, const int32_t y, const float64_t valW,
00626             const float64_t valS, const float64_t valL, const float64_t valR,
00627             const int32_t debug);
00628 
00630         inline virtual const char* get_name() const { return "Trie"; }
00631 
00632     public:
00634         int32_t NUM_SYMS;
00635 
00636     protected:
00638         int32_t length;
00640         int32_t * trees;
00641 
00643         int32_t degree;
00645         float64_t const *  position_weights;
00646 
00648         Trie* TreeMem;
00650         int32_t TreeMemPtr;
00652         int32_t TreeMemPtrMax;
00654         bool use_compact_terminal_nodes;
00655 
00657         bool weights_in_tree;
00658 
00660         int32_t* nofsKmers;
00661 };
00662     template <class Trie>
00663     CTrie<Trie>::CTrie()
00664     : CSGObject(), degree(0), position_weights(NULL),
00665         use_compact_terminal_nodes(false),
00666         weights_in_tree(true)
00667     {
00668 
00669         TreeMemPtrMax=0;
00670         TreeMemPtr=0;
00671         TreeMem=NULL;
00672 
00673         length=0;
00674         trees=NULL;
00675 
00676         NUM_SYMS=4;
00677     }
00678 
00679     template <class Trie>
00680     CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes)
00681     : CSGObject(), degree(d), position_weights(NULL),
00682         use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00683         weights_in_tree(true)
00684     {
00685         TreeMemPtrMax=1024*1024/sizeof(Trie);
00686         TreeMemPtr=0;
00687         TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00688 
00689         length=0;
00690         trees=NULL;
00691 
00692         NUM_SYMS=4;
00693     }
00694 
00695     template <class Trie>
00696     CTrie<Trie>::CTrie(const CTrie & to_copy)
00697     : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00698         use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00699     {
00700         if (to_copy.position_weights!=NULL)
00701         {
00702             position_weights = to_copy.position_weights;
00703             /*new float64_t[to_copy.length];
00704               for (int32_t i=0; i<to_copy.length; i++)
00705               position_weights[i]=to_copy.position_weights[i]; */
00706         }
00707         else
00708             position_weights=NULL;
00709 
00710         TreeMemPtrMax=to_copy.TreeMemPtrMax;
00711         TreeMemPtr=to_copy.TreeMemPtr;
00712         TreeMem=(Trie*)malloc(TreeMemPtrMax*sizeof(Trie));
00713         memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00714 
00715         length=to_copy.length;
00716         trees=new int32_t[length];
00717         for (int32_t i=0; i<length; i++)
00718             trees[i]=to_copy.trees[i];
00719 
00720         NUM_SYMS=4;
00721     }
00722 
00723     template <class Trie>
00724 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00725 {
00726     degree=to_copy.degree ;
00727     use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00728 
00729     delete[] position_weights ;
00730     position_weights=NULL ;
00731     if (to_copy.position_weights!=NULL)
00732     {
00733         position_weights=to_copy.position_weights ;
00734         /*position_weights = new float64_t[to_copy.length] ;
00735           for (int32_t i=0; i<to_copy.length; i++)
00736           position_weights[i]=to_copy.position_weights[i] ;*/
00737     }
00738     else
00739         position_weights=NULL ;
00740 
00741     TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00742     TreeMemPtr=to_copy.TreeMemPtr ;
00743     free(TreeMem) ;
00744     TreeMem = (Trie*)malloc(TreeMemPtrMax*sizeof(Trie)) ;
00745     memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00746 
00747     length = to_copy.length ;
00748     if (trees)
00749         delete[] trees ;
00750     trees=new int32_t[length] ;     
00751     for (int32_t i=0; i<length; i++)
00752         trees[i]=to_copy.trees[i] ;
00753 
00754     return *this ;
00755 }
00756 
00757 template <class Trie>
00758 int32_t CTrie<Trie>::find_deepest_node(
00759     int32_t start_node, int32_t& deepest_node) const
00760 {
00761 #ifdef TRIE_CHECK_EVERYTHING
00762     int32_t ret=0 ;
00763     SG_DEBUG("start_node=%i\n", start_node) ;
00764 
00765     if (start_node==NO_CHILD) 
00766     {
00767         for (int32_t i=0; i<length; i++)
00768         {
00769             int32_t my_deepest_node ;
00770             int32_t depth=find_deepest_node(i, my_deepest_node) ;
00771             SG_DEBUG("start_node %i depth=%i\n", i, depth) ;
00772             if (depth>ret)
00773             {
00774                 deepest_node=my_deepest_node ;
00775                 ret=depth ;
00776             }
00777         }
00778         return ret ;
00779     }
00780 
00781     if (TreeMem[start_node].has_seq)
00782     {
00783         for (int32_t q=0; q<16; q++)
00784             if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00785                 ret++ ;
00786         deepest_node=start_node ;
00787         return ret ;
00788     }
00789     if (TreeMem[start_node].has_floats)
00790     {
00791         deepest_node=start_node ;
00792         return 1 ;
00793     }
00794 
00795     for (int32_t q=0; q<4; q++)
00796     {
00797         int32_t my_deepest_node ;
00798         if (TreeMem[start_node].children[q]==NO_CHILD)
00799             continue ;
00800         int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00801         if (depth>ret)
00802         {
00803             deepest_node=my_deepest_node ;
00804             ret=depth ;
00805         }
00806     }
00807     return ret ;
00808 #else
00809     SG_ERROR( "not implemented\n") ;
00810     return 0 ;
00811 #endif
00812 }
00813 
00814     template <class Trie>
00815 int32_t CTrie<Trie>::compact_nodes(
00816     int32_t start_node, int32_t depth, float64_t * weights) 
00817 {
00818     SG_ERROR( "code buggy\n") ;
00819 
00820     int32_t ret=0 ;
00821 
00822     if (start_node==NO_CHILD) 
00823     {
00824         for (int32_t i=0; i<length; i++)
00825             compact_nodes(i,1, weights) ;
00826         return 0 ;
00827     }
00828     if (start_node<0)
00829         return -1 ;
00830 
00831     if (depth==degree-1)
00832     {
00833         TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
00834         int32_t num_used=0 ;
00835         for (int32_t q=0; q<4; q++)
00836             if (TreeMem[start_node].child_weights[q]!=0.0)
00837                 num_used++ ;
00838         if (num_used>1)
00839             return -1 ;
00840         return 1 ;
00841     }
00842     TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
00843 
00844     int32_t num_used = 0 ;
00845     int32_t q_used=-1 ;
00846 
00847     for (int32_t q=0; q<4; q++)
00848     {
00849         if (TreeMem[start_node].children[q]==NO_CHILD)
00850             continue ;
00851         num_used++ ;
00852         q_used=q ;
00853     }
00854     if (num_used>1)
00855     {
00856         if (depth>=degree-2)
00857             return -1 ;
00858         for (int32_t q=0; q<4; q++)
00859         {
00860             if (TreeMem[start_node].children[q]==NO_CHILD)
00861                 continue ;
00862             int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00863             if (num<=2)
00864                 continue ;
00865             int32_t node=get_node() ;
00866 
00867             int32_t last_node=TreeMem[start_node].children[q] ;
00868             if (weights_in_tree)
00869             {
00870                 ASSERT(weights[depth]!=0.0) ;
00871                 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00872             }
00873             else
00874                 TreeMem[node].weight=TreeMem[last_node].weight ;
00875 
00876 #ifdef TRIE_CHECK_EVERYTHING
00877             TreeMem[node].has_seq=true ;
00878 #endif
00879             memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00880             for (int32_t n=0; n<num; n++)
00881             {
00882                 ASSERT(depth+n+1<=degree-1) ;
00883                 ASSERT(last_node!=NO_CHILD) ;
00884                 if (depth+n+1==degree-1)
00885                 {
00886                     TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
00887                     int32_t  k ;
00888                     for (k=0; k<4; k++)
00889                         if (TreeMem[last_node].child_weights[k]!=0.0)
00890                             break ;
00891                     if (k==4)
00892                         break ;
00893                     TreeMem[node].seq[n]=k ;
00894                     break ;
00895                 }
00896                 else
00897                 {
00898                     TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
00899                     int32_t k ;
00900                     for (k=0; k<4; k++)
00901                         if (TreeMem[last_node].children[k]!=NO_CHILD)
00902                             break ;
00903                     if (k==4)
00904                         break ;
00905                     TreeMem[node].seq[n]=k ;
00906                     last_node=TreeMem[last_node].children[k] ;
00907                 }
00908             }
00909             TreeMem[start_node].children[q]=-node ;
00910         }
00911         return -1 ;
00912     }
00913     if (num_used==0)
00914         return 0 ;
00915 
00916     ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00917     if (ret<0)
00918         return ret ;
00919     return ret+1 ;
00920 }
00921 
00922 
00923     template <class Trie>
00924 bool CTrie<Trie>::compare_traverse(
00925     int32_t node, const CTrie<Trie> & other, int32_t other_node)
00926 {
00927     SG_DEBUG("checking nodes %i and %i\n", node, other_node) ;
00928     if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00929     {
00930         SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ;
00931         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00932         display_node(node) ;
00933         SG_DEBUG( "============================================================\n") ;           
00934         other.display_node(other_node) ;
00935         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00936         return false ;
00937     }
00938 
00939 #ifdef TRIE_CHECK_EVERYTHING
00940     if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00941     {
00942         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ;
00943         SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00944         display_node(node) ;
00945         SG_DEBUG( "============================================================\n") ;           
00946         other.display_node(other_node) ;
00947         SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00948         return false ;
00949     }
00950     if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00951     {
00952         SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ;
00953         return false ;
00954     }
00955     if (other.TreeMem[other_node].has_floats)
00956     {
00957         for (int32_t q=0; q<4; q++)
00958             if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00959             {
00960                 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ;
00961                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00962                 display_node(node) ;
00963                 SG_DEBUG( "============================================================\n") ;           
00964                 other.display_node(other_node) ;
00965                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00966                 return false ;
00967             }
00968     }
00969     if (other.TreeMem[other_node].has_seq)
00970     {
00971         for (int32_t q=0; q<16; q++)
00972             if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00973             {
00974                 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ;
00975                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00976                 display_node(node) ;
00977                 SG_DEBUG( "============================================================\n") ;           
00978                 other.display_node(other_node) ;
00979                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;           
00980                 return false ;
00981             }
00982     }
00983     if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00984     {
00985         for (int32_t q=0; q<4; q++)
00986         {
00987             if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00988                 continue ;
00989             if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00990             {
00991                 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ;
00992                 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;           
00993                 display_node(node) ;
00994                 SG_DEBUG( "============================================================\n") ;           
00995                 other.display_node(other_node) ;
00996                 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00997                 return false ;
00998             }
00999             if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
01000                 return false ;
01001         }
01002     }
01003 #else
01004     SG_ERROR( "not implemented\n") ;
01005 #endif
01006 
01007     return true ;
01008 }
01009 
01010     template <class Trie>
01011 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
01012 {
01013     bool ret=true ;
01014     for (int32_t i=0; i<length; i++)
01015         if (!compare_traverse(trees[i], other, other.trees[i]))
01016             return false ;
01017         else
01018             SG_DEBUG("two tries at %i identical\n", i) ;
01019 
01020     return ret ;
01021 }
01022 
01023 template <class Trie>
01024 bool CTrie<Trie>::find_node(
01025     int32_t node, int32_t * trace, int32_t& trace_len) const 
01026 {
01027 #ifdef TRIE_CHECK_EVERYTHING
01028     ASSERT(trace_len-1>=0) ;
01029     ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
01030         if (TreeMem[trace[trace_len-1]].has_seq)
01031             return false ;
01032     if (TreeMem[trace[trace_len-1]].has_floats)
01033         return false ;
01034 
01035     for (int32_t q=0; q<4; q++)
01036     {
01037         if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
01038             continue ;
01039         int32_t tl=trace_len+1 ;
01040         if (TreeMem[trace[trace_len-1]].children[q]>=0)
01041             trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
01042         else
01043             trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
01044 
01045         if (trace[trace_len]==node)
01046         {
01047             trace_len=tl ;
01048             return true ;
01049         }
01050         if (find_node(node, trace, tl))
01051         {
01052             trace_len=tl ;
01053             return true ;
01054         }
01055     }
01056     trace_len=0 ;
01057     return false ;
01058 #else
01059     SG_ERROR( "not implemented\n") ;
01060     return false ;
01061 #endif
01062 }
01063 
01064 template <class Trie>
01065 void CTrie<Trie>::display_node(int32_t node) const
01066 {
01067 #ifdef TRIE_CHECK_EVERYTHING
01068     int32_t * trace=new int32_t[2*degree] ;
01069     int32_t trace_len=-1 ;
01070     bool found = false ;
01071     int32_t tree=-1 ;
01072     for (tree=0; tree<length; tree++)
01073     {
01074         trace[0]=trees[tree] ;
01075         trace_len=1 ;
01076         found=find_node(node, trace, trace_len) ;
01077         if (found)
01078             break ;
01079     }
01080     ASSERT(found) ;
01081     SG_PRINT( "position %i  trace: ", tree) ;
01082 
01083     for (int32_t i=0; i<trace_len-1; i++)
01084     {
01085         int32_t branch=-1 ;
01086         for (int32_t q=0; q<4; q++)
01087             if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
01088             {
01089                 branch=q;
01090                 break ;
01091             }
01092         ASSERT(branch!=-1) ;
01093         char acgt[5]="ACGT" ;
01094         SG_PRINT( "%c", acgt[branch]) ;
01095     }
01096     SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
01097     if (TreeMem[node].has_floats)
01098     {
01099         for (int32_t q=0; q<4; q++)
01100             SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
01101     }
01102     if (TreeMem[node].has_seq)
01103     {
01104         for (int32_t q=0; q<16; q++)
01105             SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
01106     }
01107     if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01108     {
01109         for (int32_t q=0; q<4; q++)
01110         {
01111             if (TreeMem[node].children[q]!=NO_CHILD)
01112             {
01113                 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
01114                 display_node(abs(TreeMem[node].children[q])) ;
01115             }
01116             else
01117                 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
01118         }
01119 
01120     }
01121 
01122     delete[] trace ;
01123 #else
01124     SG_ERROR( "not implemented\n") ;
01125 #endif
01126 }
01127 
01128 
01129 template <class Trie> CTrie<Trie>::~CTrie()
01130 {
01131     destroy() ;
01132 
01133     free(TreeMem) ;
01134 }
01135 
01136 template <class Trie> void CTrie<Trie>::destroy()
01137 {
01138     if (trees!=NULL)
01139     {
01140         delete_trees();
01141         for (int32_t i=0; i<length; i++)
01142             trees[i] = NO_CHILD;
01143         delete[] trees;
01144 
01145         TreeMemPtr=0;
01146         length=0;
01147         trees=NULL;
01148     }
01149 }
01150 
01151 template <class Trie> void CTrie<Trie>::set_degree(int32_t d)
01152 {
01153     delete_trees(get_use_compact_terminal_nodes());
01154     degree=d;
01155 }
01156 
01157 template <class Trie> void CTrie<Trie>::create(
01158     int32_t len, bool p_use_compact_terminal_nodes)
01159 {
01160     destroy();
01161 
01162     trees=new int32_t[len] ;        
01163     TreeMemPtr=0 ;
01164     for (int32_t i=0; i<len; i++)
01165         trees[i]=get_node(degree==1);
01166     length = len ;
01167 
01168     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01169 }
01170 
01171 
01172 template <class Trie> void CTrie<Trie>::delete_trees(
01173     bool p_use_compact_terminal_nodes)
01174 {
01175     if (trees==NULL)
01176         return;
01177 
01178     TreeMemPtr=0 ;
01179     for (int32_t i=0; i<length; i++)
01180         trees[i]=get_node(degree==1);
01181 
01182     use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01183 } 
01184 
01185     template <class Trie>
01186 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth)
01187 {
01188     float64_t ret=0 ;
01189 
01190     if (tree==NO_CHILD)
01191         return 0 ;
01192     TRIE_ASSERT(tree>=0) ;
01193 
01194     if (depth==degree-2)
01195     {
01196         ret+=(TreeMem[tree].weight) ;
01197 
01198         for (int32_t k=0; k<4; k++)
01199             ret+=(TreeMem[tree].child_weights[k]) ;
01200 
01201         return ret ;
01202     }
01203 
01204     ret+=(TreeMem[tree].weight) ;
01205 
01206     for (int32_t i=0; i<4; i++)
01207         if (TreeMem[tree].children[i]!=NO_CHILD)
01208             ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1)  ;
01209 
01210     return ret ;
01211 }
01212 
01213 
01214     template <class Trie>
01215 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len)
01216 {
01217     float64_t * sum=new float64_t[length*4] ;
01218     for (int32_t i=0; i<length*4; i++)
01219         sum[i]=0 ;
01220     len=length ;
01221 
01222     for (int32_t i=0; i<length; i++)
01223     {
01224         TRIE_ASSERT(trees[i]!=NO_CHILD) ;
01225         for (int32_t k=0; k<4; k++)
01226         {
01227             sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01228         }
01229     }
01230 
01231     return sum ;
01232 }
01233 
01234     template <class Trie>
01235 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(
01236     int32_t tree,  int32_t i, float64_t alpha,
01237         int32_t *vec, int32_t len_rem, 
01238         int32_t degree_rec, int32_t mismatch_rec, 
01239         int32_t max_mismatch, float64_t * weights) 
01240 {
01241     if (tree==NO_CHILD)
01242         tree=trees[i] ;
01243     TRIE_ASSERT(tree!=NO_CHILD) ;
01244 
01245     if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01246         return ;
01247     const int32_t other[4][3] = {   {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01248 
01249     int32_t subtree = NO_CHILD ;
01250 
01251     if (degree_rec==degree-1)
01252     {
01253         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01254         if (weights_in_tree)
01255             TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01256         else
01257             if (weights[degree_rec]!=0.0)
01258                 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01259         if (mismatch_rec+1<=max_mismatch)
01260             for (int32_t o=0; o<3; o++)
01261             {
01262                 if (weights_in_tree)
01263                     TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01264                 else
01265                     if (weights[degree_rec]!=0.0)
01266                         TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01267             }
01268         return ;
01269     }
01270     else
01271     {
01272         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01273         if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01274         {
01275             subtree=TreeMem[tree].children[vec[0]] ;
01276             if (weights_in_tree)
01277                 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01278             else
01279                 if (weights[degree_rec]!=0.0)
01280                     TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01281         }
01282         else 
01283         {
01284             int32_t tmp = get_node(degree_rec==degree-2);
01285             ASSERT(tmp>=0) ;
01286             TreeMem[tree].children[vec[0]]=tmp ;
01287             subtree=tmp ;
01288 #ifdef TRIE_CHECK_EVERYTHING
01289             if (degree_rec==degree-2)
01290                 TreeMem[subtree].has_floats=true ;
01291 #endif
01292             if (weights_in_tree)
01293                 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01294             else
01295                 if (weights[degree_rec]!=0.0)
01296                     TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01297                 else
01298                     TreeMem[subtree].weight = 0.0 ;
01299         }
01300         add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01301                 &vec[1], len_rem-1, 
01302                 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01303 
01304         if (mismatch_rec+1<=max_mismatch)
01305         {
01306             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01307             for (int32_t o=0; o<3; o++)
01308             {
01309                 int32_t ot = other[vec[0]][o] ;
01310                 if (TreeMem[tree].children[ot]!=NO_CHILD)
01311                 {
01312                     subtree=TreeMem[tree].children[ot] ;
01313                     if (weights_in_tree)
01314                         TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01315                     else
01316                         if (weights[degree_rec]!=0.0)
01317                             TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01318                 }
01319                 else 
01320                 {
01321                     int32_t tmp = get_node(degree_rec==degree-2);
01322                     ASSERT(tmp>=0) ;
01323                     TreeMem[tree].children[ot]=tmp ;
01324                     subtree=tmp ;
01325 #ifdef TRIE_CHECK_EVERYTHING
01326                     if (degree_rec==degree-2)
01327                         TreeMem[subtree].has_floats=true ;
01328 #endif
01329 
01330                     if (weights_in_tree)
01331                         TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01332                     else
01333                         if (weights[degree_rec]!=0.0)
01334                             TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01335                         else
01336                             TreeMem[subtree].weight = 0.0 ;
01337                 }
01338 
01339                 add_example_to_tree_mismatch_recursion(subtree,  i, alpha,
01340                         &vec[1], len_rem-1, 
01341                         degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01342             }
01343         }
01344     }
01345 }
01346 
01347     template <class Trie>
01348 void CTrie<Trie>::compute_scoring_helper(
01349     int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
01350     int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
01351     int32_t offs, float64_t* result)
01352 {
01353     if (i+j<num_feat)
01354     {
01355         float64_t decay=1.0; //no decay by default
01356         //if (j>d)
01357         //  decay=pow(0.5,j); //marginalize out lower order matches
01358 
01359         if (j<degree-1)
01360         {
01361             for (int32_t k=0; k<num_sym; k++)
01362             {
01363                 if (TreeMem[tree].children[k]!=NO_CHILD)
01364                 {
01365                     int32_t child=TreeMem[tree].children[k];
01366                     //continue recursion if not yet at max_degree, else add to result
01367                     if (d<max_degree-1)
01368                         compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01369                     else
01370                         result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01371 
01373                     if (d==0)
01374                         compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01375                 }
01376             }
01377         }
01378         else if (j==degree-1)
01379         {
01380             for (int32_t k=0; k<num_sym; k++)
01381             {
01382                 //continue recursion if not yet at max_degree, else add to result
01383                 if (d<max_degree-1 && i<num_feat-1)
01384                     compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01385                 else
01386                     result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01387             }
01388         }
01389     }
01390 }
01391 
01392     template <class Trie>
01393 void CTrie<Trie>::traverse(
01394     int32_t tree, const int32_t p, struct TreeParseInfo info,
01395     const int32_t depth, int32_t* const x, const int32_t k)
01396 {
01397     const int32_t num_sym = info.num_sym;
01398     const int32_t y0 = info.y0;
01399     const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01400     //const int32_t temp = info.substrs[depth]*num_sym - ( (depth<=k) ? 0 : info.nofsKmers[k] * x[depth-k-1] );
01401     //if( !( info.y0 == temp ) ) {
01402     //  printf( "\n temp=%d y0=%d k=%d depth=%d \n", temp, info.y0, k, depth );
01403     //}
01404     //ASSERT( info.y0 == temp );
01405     int32_t sym;
01406     ASSERT( depth < degree );
01407     //ASSERT( 0 <= info.substrs[depth] && info.substrs[depth] < info.nofsKmers[k] );
01408     if (depth<degree-1)
01409     {
01410         for( sym=0; sym<num_sym; ++sym ) {
01411             const int32_t childNum = TreeMem[tree].children[ sym ];
01412             if( childNum != NO_CHILD ) {
01413                 int32_t child = childNum ;
01414                 x[depth] = sym;
01415                 info.substrs[depth+1] = y0 + sym;
01416                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01417                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01418                 count( TreeMem[child].weight, depth, info, p, x, k );
01419                 traverse( child, p, info, depth+1, x, k );
01420                 x[depth] = -1;
01421             }
01422         }
01423     }
01424     else if( depth == degree-1 )
01425     {
01426         for( sym=0; sym<num_sym; ++sym ) {
01427             const float64_t w = TreeMem[tree].child_weights[ sym ];
01428             if( w != 0.0 ) {
01429                 x[depth] = sym;
01430                 info.substrs[depth+1] = y0 + sym;
01431                 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01432                 //ASSERT( info.y0 == ( info.substrs[depth+1]*num_sym - ( (depth<k) ? 0 : info.nofsKmers[k] * x[depth-k] ) ) );
01433                 count( w, depth, info, p, x, k );
01434                 x[depth] = -1;
01435             }
01436         }
01437     }
01438     //info.substrs[depth+1] = -1;
01439     //info.y0 = temp;
01440 }
01441 
01442     template <class Trie>
01443 void CTrie<Trie>::count(
01444     const float64_t w, const int32_t depth, const struct TreeParseInfo info,
01445     const int32_t p, int32_t* x, const int32_t k)
01446 {
01447     ASSERT( fabs(w) < 1e10 );
01448     ASSERT( x[depth] >= 0 );
01449     ASSERT( x[depth+1] < 0 );
01450     if ( depth < k ) {
01451         return;
01452     }
01453     //ASSERT( info.margFactors[ depth-k ] == pow( 0.25, depth-k ) );
01454     const int32_t nofKmers = info.nofsKmers[k];
01455     const float64_t margWeight =  w * info.margFactors[ depth-k ];
01456     const int32_t m_a = depth - k + 1;
01457     const int32_t m_b = info.num_feat - p;
01458     const int32_t m = ( m_a < m_b ) ? m_a : m_b;
01459     // all proper k-substrings
01460     const int32_t offset0 = nofKmers * p;
01461     register int32_t i;
01462     register int32_t offset;
01463     offset = offset0;
01464     for( i = 0; i < m; ++i ) {
01465         const int32_t y = info.substrs[i+k+1];
01466         info.C_k[ y + offset ] += margWeight;
01467         offset += nofKmers;
01468     }
01469     if( depth > k ) {
01470         // k-prefix
01471         const int32_t offsR = info.substrs[k+1] + offset0;
01472         info.R_k[offsR] += margWeight;
01473         // k-suffix
01474         if( p+depth-k < info.num_feat ) {
01475             const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01476             info.L_k[offsL] += margWeight; 
01477         }
01478     }
01479     //    # N.x = substring represented by N
01480     //    # N.d = length of N.x
01481     //    # N.s = starting position of N.x
01482     //    # N.w = weight for feature represented by N
01483     //    if( N.d >= k )
01484     //      margContrib = w / 4^(N.d-k)
01485     //      for i = 1 to (N.d-k+1)
01486     //        y = N.x[i:(i+k-1)]  # overlapped k-mer
01487     //        C_k[ N.s+i-1, y ] += margContrib
01488     //      end;
01489     //      if( N.d > k )
01490     //        L_k[ N.s+N.d-k, N.x[N.d-k+(1:k)] ] += margContrib  # j-suffix of N.x
01491     //        R_k[ N.s,       N.x[1:k]         ] += margContrib  # j-prefix of N.x
01492     //      end;
01493     //    end;
01494 }
01495 
01496     template <class Trie>
01497 void CTrie<Trie>::add_to_trie(
01498     int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha,
01499     float64_t *weights, bool degree_times_position_weights)
01500 {
01501     int32_t tree = trees[i] ;
01502     //ASSERT(seq_offset==0) ;
01503 
01504     int32_t max_depth = 0 ;
01505     float64_t* weights_column ;
01506     if (degree_times_position_weights)
01507         weights_column = &weights[(i+seq_offset)*degree] ;
01508     else
01509         weights_column = weights ;
01510 
01511     if (weights_in_tree)
01512     {
01513         for (int32_t j=0; (j<degree) && (i+j<length); j++)
01514             if (CMath::abs(weights_column[j]*alpha)>0)
01515                 max_depth = j+1 ;
01516     }
01517     else
01518         // don't use the weights
01519         max_depth=degree ;
01520 
01521     for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01522     {
01523         TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
01524         if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01525         {
01526             if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01527             {
01528                 // special treatment of the next nodes
01529                 TRIE_ASSERT(j >= degree-16) ;
01530                 // get the right element
01531                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01532                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01533                 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01534 
01535                 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
01536                 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
01537                 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
01538 
01539                 // check whether the same string is stored
01540                 int32_t mismatch_pos = -1 ;
01541                 {
01542                     int32_t k ;
01543                     for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01544                     {
01545                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01546                         // ###
01547                         if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01548                             fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01549                         TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
01550                         TRIE_ASSERT(k<16) ;
01551                         if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01552                         {
01553                             mismatch_pos=k ;
01554                             break ;
01555                         }
01556                     }
01557                 }
01558 
01559                 // what happens when the .seq sequence is longer than vec? should we branch???
01560 
01561                 if (mismatch_pos==-1)
01562                     // if so, then just increase the weight by alpha and stop
01563                     TreeMem[node].weight+=alpha ;
01564                 else
01565                     // otherwise
01566                     // 1. replace current node with new node
01567                     // 2. create new nodes until mismatching positon
01568                     // 2. add a branch with old string (old node) and the new string (new node)
01569                 {
01570                     // replace old node
01571                     int32_t last_node=tree ;
01572 
01573                     // create new nodes until mismatch
01574                     int32_t k ;
01575                     for (k=0; k<mismatch_pos; k++)
01576                     {
01577                         TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01578                         TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
01579 
01580                         int32_t tmp=get_node();
01581                         TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01582                         last_node=tmp ;
01583                         if (weights_in_tree)
01584                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01585                         else
01586                             TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01587                         TRIE_ASSERT(j+k!=degree-1) ;
01588                     }
01589                     if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01590                         fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01591                     ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
01592                     TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
01593 
01594                     if (j+k==degree-1)
01595                     {
01596                         // init child weights with zero if after dropping out
01597                         // of the k<mismatch_pos loop we are one level below degree
01598                         // (keep this even after get_node() change!)
01599                         for (int32_t q=0; q<4; q++)
01600                             TreeMem[last_node].child_weights[q]=0.0 ;
01601                         if (weights_in_tree)
01602                         {
01603                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01604                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01605                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01606                         }
01607                         else
01608                         {
01609                             if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01610                                 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01611                             TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01612                         }
01613 
01614 #ifdef TRIE_CHECK_EVERYTHING
01615                         TreeMem[last_node].has_floats=true ;
01616 #endif
01617                     }
01618                     else
01619                     {
01620                         // the branch for the existing string
01621                         if (TreeMem[node].seq[mismatch_pos]<4) // i.e. !=TRIE_TERMINAL_CHARACTER
01622                         {
01623                             TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01624 
01625                             // move string by mismatch_pos positions
01626                             for (int32_t q=0; q<16; q++)
01627                             {
01628                                 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01629                                     TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01630                                 else
01631                                     TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01632                             }
01633 #ifdef TRIE_CHECK_EVERYTHING
01634                             TreeMem[node].has_seq=true ;
01635 #endif
01636                         }
01637 
01638                         // the new branch
01639                         TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
01640                         int32_t tmp = get_node() ;
01641                         TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01642                         last_node=tmp ;
01643 
01644                         TreeMem[last_node].weight = alpha ;
01645 #ifdef TRIE_CHECK_EVERYTHING
01646                         TreeMem[last_node].has_seq = true ;
01647 #endif
01648                         memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01649                         for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01650                             TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01651                     }
01652                 }
01653                 break ;
01654             } 
01655             else
01656             {
01657                 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01658                 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01659                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01660                 if (weights_in_tree)
01661                     TreeMem[tree].weight += alpha*weights_column[j];
01662                 else
01663                     TreeMem[tree].weight += alpha ;
01664             }
01665         }
01666         else if (j==degree-1)
01667         {
01668             // special treatment of the last node
01669             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01670             TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01671             if (weights_in_tree)
01672                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01673             else
01674                 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01675 
01676             break;
01677         }
01678         else
01679         {
01680             bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01681             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01682             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01683 
01684             int32_t tmp = get_node((j==degree-2) && (!use_seq));
01685             if (use_seq)
01686                 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01687             else
01688                 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01689             tree=tmp ;
01690 
01691             TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01692 #ifdef TRIE_CHECK_EVERYTHING
01693             TreeMem[tree].has_seq = use_seq ;
01694 #endif
01695             if (use_seq)
01696             {
01697                 TreeMem[tree].weight = alpha ;
01698                 // important to have the terminal characters (see ###)
01699                 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01700                 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01701                 {
01702                     TRIE_ASSERT(q<16) ;
01703                     TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01704                 }
01705                 break ;
01706             }
01707             else
01708             {
01709                 if (weights_in_tree)
01710                     TreeMem[tree].weight = alpha*weights_column[j] ;
01711                 else
01712                     TreeMem[tree].weight = alpha ;
01713 #ifdef TRIE_CHECK_EVERYTHING
01714                 if (j==degree-2)
01715                     TreeMem[tree].has_floats = true ;
01716 #endif
01717             }
01718         }
01719     }
01720 }
01721 
01722     template <class Trie>
01723 float64_t CTrie<Trie>::compute_by_tree_helper(
01724     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01725         int32_t weight_pos, float64_t* weights, 
01726         bool degree_times_position_weights)
01727 {
01728     int32_t tree = trees[tree_pos] ;
01729 
01730     if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01731         return 0.0;
01732 
01733     float64_t *weights_column=NULL ;
01734     if (degree_times_position_weights)
01735         weights_column=&weights[weight_pos*degree] ;
01736     else // weights is a vector (1 x degree)
01737         weights_column=weights ;
01738 
01739     float64_t sum=0 ;
01740     for (int32_t j=0; seq_pos+j < len; j++)
01741     {
01742         TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
01743 
01744         if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01745         {
01746             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01747             if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01748             {
01749                 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01750                 TRIE_ASSERT(tree>=0) ;
01751                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01752                 float64_t this_weight=0.0 ;
01753                 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01754                 {
01755                     TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
01756                     if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01757                         break ;
01758                     this_weight += weights_column[j+k] ;
01759                 }
01760                 sum += TreeMem[tree].weight * this_weight ;
01761                 break ;
01762             }
01763             else
01764             {
01765                 tree=TreeMem[tree].children[vec[seq_pos+j]];
01766                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01767                 if (weights_in_tree)
01768                     sum += TreeMem[tree].weight ;
01769                 else
01770                     sum += TreeMem[tree].weight * weights_column[j] ;
01771             } ;
01772         }
01773         else
01774         {
01775             TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01776             if (j==degree-1)
01777             {
01778                 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01779                 if (weights_in_tree)
01780                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01781                 else
01782                     sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01783             }
01784             else
01785                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01786 
01787             break;
01788         }
01789     } 
01790 
01791     if (position_weights!=NULL)
01792         return sum*position_weights[weight_pos] ;
01793     else
01794         return sum ;
01795 }
01796 
01797     template <class Trie>
01798 void CTrie<Trie>::compute_by_tree_helper(
01799     int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01800     int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
01801     int32_t mkl_stepsize, float64_t * weights,
01802     bool degree_times_position_weights)
01803 {
01804     int32_t tree = trees[tree_pos] ;
01805     if (factor==0)
01806         return ;
01807 
01808     if (position_weights!=NULL)
01809     {
01810         factor *= position_weights[weight_pos] ;
01811         if (factor==0)
01812             return ;
01813         if (!degree_times_position_weights) // with position_weigths, weights is a vector (1 x degree)
01814         {
01815             for (int32_t j=0; seq_pos+j<len; j++)
01816             {
01817                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01818                 {
01819                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01820                     {
01821                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01822                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01823                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01824                         {
01825                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01826                                 break ;
01827                             if (weights_in_tree)
01828                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01829                             else
01830                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01831                         }
01832                         break ;
01833                     }
01834                     else
01835                     {
01836                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01837                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01838                         if (weights_in_tree)
01839                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01840                         else
01841                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01842                     }
01843                 } 
01844                 else
01845                 {
01846                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01847                     if (j==degree-1)
01848                     {
01849                         if (weights_in_tree)
01850                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01851                         else
01852                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01853                     }
01854                 }
01855             }
01856         } 
01857         else // with position_weigths, weights is a matrix (len x degree)
01858         {
01859             for (int32_t j=0; seq_pos+j<len; j++)
01860             {
01861                 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01862                 {
01863                     if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01864                     {
01865                         tree = -TreeMem[tree].children[vec[seq_pos+j]];
01866                         TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01867                         for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01868                         {
01869                             if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01870                                 break ;
01871                             if (weights_in_tree)
01872                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01873                             else
01874                                 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01875                         }
01876                         break ;
01877                     }
01878                     else
01879                     {
01880                         tree=TreeMem[tree].children[vec[seq_pos+j]];
01881                         TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01882                         if (weights_in_tree)
01883                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01884                         else
01885                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01886                     }
01887                 } 
01888                 else
01889                 {
01890                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01891                     if (j==degree-1)
01892                     {
01893                         if (weights_in_tree)
01894                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01895                         else
01896                             LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01897                     }         
01898                     break ;
01899                 }
01900             }
01901         } 
01902     }
01903     else if (!degree_times_position_weights) // no position_weigths, weights is a vector (1 x degree)
01904     {
01905         for (int32_t j=0; seq_pos+j<len; j++)
01906         {
01907             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01908             {
01909                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01910                 {
01911                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01912                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01913                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01914                     {
01915                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01916                             break ;
01917                         if (weights_in_tree)
01918                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01919                         else
01920                             LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01921                     }
01922                     break ;
01923                 }
01924                 else
01925                 {
01926                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01927                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01928                     if (weights_in_tree)
01929                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01930                     else
01931                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01932                 }
01933             }
01934             else
01935             {
01936                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01937                 if (j==degree-1)
01938                 {
01939                     if (weights_in_tree)
01940                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01941                     else
01942                         LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01943                 }
01944                 break ;
01945             }
01946         } 
01947     } 
01948     else // no position_weigths, weights is a matrix (len x degree)
01949     {
01950         /*if (!position_mask)
01951           {     
01952           position_mask = new bool[len] ;
01953           for (int32_t i=0; i<len; i++)
01954           {
01955           position_mask[i]=false ;
01956 
01957           for (int32_t j=0; j<degree; j++)
01958           if (weights[i*degree+j]!=0.0)
01959           {
01960           position_mask[i]=true ;
01961           break ;
01962           }
01963           }
01964           }
01965           if (position_mask[weight_pos]==0)
01966           return ;*/
01967 
01968         for (int32_t j=0; seq_pos+j<len; j++)
01969         {
01970             if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01971             {
01972                 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01973                 {
01974                     tree = -TreeMem[tree].children[vec[seq_pos+j]];
01975                     TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01976                     for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01977                     {
01978                         if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01979                             break ;
01980                         if (weights_in_tree)
01981                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01982                         else
01983                             LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01984                     }
01985                     break ;
01986                 }
01987                 else
01988                 {
01989                     tree=TreeMem[tree].children[vec[seq_pos+j]];
01990                     TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01991                     if (weights_in_tree)
01992                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01993                     else
01994                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01995                 } 
01996             }
01997             else
01998             {
01999                 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
02000                 if (j==degree-1)
02001                 {
02002                     if (weights_in_tree)
02003                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
02004                     else
02005                         LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
02006                 }
02007                 break ;
02008             }
02009         } 
02010     }
02011 }
02012 
02013     template <class Trie>
02014 void CTrie<Trie>::fill_backtracking_table_recursion(
02015     Trie* tree, int32_t depth, uint64_t seq, float64_t value,
02016     DynArray<ConsensusEntry>* table, float64_t* weights)
02017 {
02018     float64_t w=1.0;
02019 
02020     if (weights_in_tree || depth==0)
02021         value+=tree->weight;
02022     else
02023     {
02024         w=weights[degree-1];
02025         value+=weights[depth-1]*tree->weight;
02026     }
02027 
02028     if (degree-1==depth)
02029     {
02030         for (int32_t sym=0; sym<4; sym++)
02031         {
02032             float64_t v=w*tree->child_weights[sym];
02033             if (v!=0.0)
02034             {
02035                 ConsensusEntry entry;
02036                 entry.bt=-1;
02037                 entry.score=value+v;
02038                 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02039 
02040                 table->append_element(entry);
02041             }
02042         }
02043     }
02044     else
02045     {
02046         for (int32_t sym=0; sym<4; sym++)
02047         {
02048             uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02049             if (tree->children[sym] != NO_CHILD)
02050                 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
02051         }
02052     }
02053 }
02054 
02055     template <class Trie>
02056 float64_t CTrie<Trie>::get_cumulative_score(
02057     int32_t pos, uint64_t seq, int32_t deg, float64_t* weights)
02058 {
02059     float64_t result=0.0;
02060 
02061     //SG_PRINT("pos:%i length:%i deg:%i seq:0x%0llx...\n", pos, length, deg, seq);
02062 
02063     for (int32_t i=pos; i<pos+deg && i<length; i++)
02064     {
02065         //SG_PRINT("loop %d\n", i);
02066         Trie* tree = &TreeMem[trees[i]];
02067 
02068         for (int32_t d=0; d<deg-i+pos; d++)
02069         {
02070             //SG_PRINT("loop degree %d shit: %d\n", d, (2*(deg-1-d-i+pos)));
02071             ASSERT(d-1<degree);
02072             int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
02073 
02074             float64_t w=1.0;
02075             if (!weights_in_tree)
02076                 w=weights[d];
02077 
02078             ASSERT(tree->children[sym] != NO_CHILD);
02079             tree=&TreeMem[tree->children[sym]];
02080             result+=w*tree->weight;
02081         }
02082     }
02083     //SG_PRINT("cum: %f\n", result);
02084     return result;
02085 }
02086 
02087     template <class Trie>
02088 void CTrie<Trie>::fill_backtracking_table(
02089     int32_t pos, DynArray<ConsensusEntry>* prev,
02090     DynArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights)
02091 {
02092     ASSERT(pos>=0 && pos<length);
02093     ASSERT(!use_compact_terminal_nodes);
02094 
02095     Trie* t = &TreeMem[trees[pos]];
02096 
02097     fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
02098 
02099 
02100     if (cumulative)
02101     {
02102         int32_t num_cur=cur->get_num_elements();
02103         for (int32_t i=0; i<num_cur; i++)
02104         {
02105             ConsensusEntry entry=cur->get_element(i);
02106             entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
02107             cur->set_element(entry,i);
02108             //SG_PRINT("cum: str:0%0llx sc:%f bt:%d\n",entry.string,entry.score,entry.bt);
02109         }
02110     }
02111 
02112     //if previous tree exists find maximum scoring path
02113     //for each element in cur and update bt table
02114     if (prev)
02115     {
02116         int32_t num_cur=cur->get_num_elements();
02117         int32_t num_prev=prev->get_num_elements();
02118 
02119         for (int32_t i=0; i<num_cur; i++)
02120         {
02121             //uint64_t str_cur_old= cur->get_element(i).string;
02122             uint64_t str_cur= cur->get_element(i).string >> 2;
02123             //SG_PRINT("...cur:0x%0llx cur_noprfx:0x%0llx...\n", str_cur_old, str_cur);
02124 
02125             int32_t bt=-1;
02126             float64_t max_score=0.0;
02127 
02128             for (int32_t j=0; j<num_prev; j++)
02129             {
02130                 //uint64_t str_prev_old= prev->get_element(j).string;
02131                 uint64_t mask=
02132                     ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
02133                 uint64_t str_prev=  mask & prev->get_element(j).string;
02134                 //SG_PRINT("...prev:0x%0llx prev_nosfx:0x%0llx mask:%0llx...\n", str_prev_old, str_prev,mask);
02135 
02136                 if (str_cur == str_prev)
02137                 {
02138                     float64_t sc=prev->get_element(j).score+cur->get_element(i).score;
02139                     if (bt==-1 || sc>max_score)
02140                     {
02141                         bt=j;
02142                         max_score=sc;
02143 
02144                         //SG_PRINT("new max[%i,%i] = %f\n", j,i, max_score);
02145                     }
02146                 }
02147             }
02148 
02149             ASSERT(bt!=-1);
02150             ConsensusEntry entry;
02151             entry.bt=bt;
02152             entry.score=max_score;
02153             entry.string=cur->get_element(i).string;
02154             cur->set_element(entry, i);
02155             //SG_PRINT("entry[%d]: str:0%0llx sc:%f bt:%d\n",i, entry.string,entry.score,entry.bt);
02156         }
02157     }
02158 }
02159 }
02160 #endif // _TRIE_H___
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation