00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #ifndef _TRIE_H___
00013 #define _TRIE_H___
00014
00015 #include <string.h>
00016 #include <shogun/lib/common.h>
00017 #include <shogun/io/SGIO.h>
00018 #include <shogun/base/DynArray.h>
00019 #include <shogun/mathematics/Math.h>
00020 #include <shogun/base/SGObject.h>
00021
00022 namespace shogun
00023 {
00024 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00025
00026
00027 #define NO_CHILD ((int32_t)-1073741824)
00028
00029 #define WEIGHTS_IN_TRIE
00030
00031
00032 #ifdef TRIE_CHECK_EVERYTHING
00033 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
00034 #else
00035 #define TRIE_ASSERT_EVERYTHING(x)
00036 #endif
00037
00038
00039 #define TRIE_ASSERT(x)
00040
00041 #define TRIE_TERMINAL_CHARACTER 7
00042
00044 struct ConsensusEntry
00045 {
00047 uint64_t string;
00049 float32_t score;
00051 int32_t bt;
00052 };
00053
00055 struct POIMTrie
00056 {
00058 float64_t weight;
00059 #ifdef TRIE_CHECK_EVERYTHING
00060
00061 bool has_seq;
00063 bool has_floats;
00064 #endif
00065 union
00066 {
00068 float32_t child_weights[4];
00070 int32_t children[4];
00072 uint8_t seq[16] ;
00073 };
00074
00076 float64_t S;
00078 float64_t L;
00080 float64_t R;
00081 };
00082
00084 struct DNATrie
00085 {
00087 float64_t weight;
00088 #ifdef TRIE_CHECK_EVERYTHING
00089
00090 bool has_seq;
00092 bool has_floats;
00093 #endif
00094 union
00095 {
00097 float32_t child_weights[4];
00099 int32_t children[4];
00101 uint8_t seq[16] ;
00102 };
00103 };
00104
00106 struct TreeParseInfo {
00108 int32_t num_sym;
00110 int32_t num_feat;
00112 int32_t p;
00114 int32_t k;
00116 int32_t* nofsKmers;
00118 float64_t* margFactors;
00120 int32_t* x;
00122 int32_t* substrs;
00124 int32_t y0;
00126 float64_t* C_k;
00128 float64_t* L_k;
00130 float64_t* R_k;
00131 };
00132
00133 #endif // DOXYGEN_SHOULD_SKIP_THIS
00134
00135 template <class Trie> class CTrie;
00136
00137 #define IGNORE_IN_CLASSLIST
00138
00156 IGNORE_IN_CLASSLIST template <class Trie> class CTrie : public CSGObject
00157 {
00158 public:
00160 CTrie();
00161
00168 CTrie(int32_t d, bool p_use_compact_terminal_nodes=true);
00169
00171 CTrie(const CTrie & to_copy);
00172 virtual ~CTrie();
00173
00175 const CTrie & operator=(const CTrie & to_copy);
00176
00184 bool compare_traverse(
00185 int32_t node, const CTrie & other, int32_t other_node);
00186
00192 bool compare(const CTrie & other);
00193
00200 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len) const;
00201
00208 int32_t find_deepest_node(
00209 int32_t start_node, int32_t &deepest_node) const;
00210
00215 void display_node(int32_t node) const;
00216
00218 void destroy();
00219
00224 void set_degree(int32_t d);
00225
00232 void create(int32_t len, bool p_use_compact_terminal_nodes=true);
00233
00239 void delete_trees(bool p_use_compact_terminal_nodes=true);
00240
00251 void add_to_trie(
00252 int32_t i, int32_t seq_offset, int32_t* vec, float32_t alpha,
00253 float64_t *weights, bool degree_times_position_weights);
00254
00261 float64_t compute_abs_weights_tree(int32_t tree, int32_t depth);
00262
00268 float64_t* compute_abs_weights(int32_t &len);
00269
00282 float64_t compute_by_tree_helper(
00283 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00284 int32_t weight_pos, float64_t * weights,
00285 bool degree_times_position_weights) ;
00286
00301 void compute_by_tree_helper(
00302 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
00303 int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
00304 int32_t mkl_stepsize, float64_t * weights,
00305 bool degree_times_position_weights);
00306
00321 void compute_scoring_helper(
00322 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
00323 int32_t max_degree, int32_t num_feat, int32_t num_sym,
00324 int32_t sym_offset, int32_t offs, float64_t* result);
00325
00338 void add_example_to_tree_mismatch_recursion(
00339 int32_t tree, int32_t i, float64_t alpha, int32_t *vec,
00340 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
00341 int32_t max_mismatch, float64_t * weights);
00342
00352 void traverse(
00353 int32_t tree, const int32_t p, struct TreeParseInfo info,
00354 const int32_t depth, int32_t* const x, const int32_t k);
00355
00365 void count(
00366 const float64_t w, const int32_t depth,
00367 const struct TreeParseInfo info, const int32_t p, int32_t* x,
00368 const int32_t k);
00369
00376 int32_t compact_nodes(int32_t start_node, int32_t depth, float64_t * weights);
00377
00386 float64_t get_cumulative_score(
00387 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights);
00388
00398 void fill_backtracking_table_recursion(
00399 Trie* tree, int32_t depth, uint64_t seq, float64_t value,
00400 DynArray<ConsensusEntry>* table, float64_t* weights);
00401
00410 void fill_backtracking_table(
00411 int32_t pos, DynArray<ConsensusEntry>* prev,
00412 DynArray<ConsensusEntry>* cur, bool cumulative,
00413 float64_t* weights);
00414
00420 void POIMs_extract_W(float64_t* const* const W, const int32_t K);
00421
00426 void POIMs_precalc_SLR(const float64_t* const distrib);
00427
00437 void POIMs_get_SLR(
00438 const int32_t parentIdx, const int32_t sym, const int32_t depth,
00439 float64_t* S, float64_t* L, float64_t* R);
00440
00447 void POIMs_add_SLR(
00448 float64_t* const* const poims, const int32_t K,
00449 const int32_t debug);
00450
00455 inline bool get_use_compact_terminal_nodes()
00456 {
00457 return use_compact_terminal_nodes ;
00458 }
00459
00465 inline void set_use_compact_terminal_nodes(
00466 bool p_use_compact_terminal_nodes)
00467 {
00468 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
00469 }
00470
00475 inline int32_t get_num_used_nodes()
00476 {
00477 return TreeMemPtr;
00478 }
00479
00484 inline void set_position_weights(float64_t* p_position_weights)
00485 {
00486 position_weights=p_position_weights;
00487 }
00488
00493 inline int32_t get_node(bool last_node=false)
00494 {
00495 int32_t ret = TreeMemPtr++;
00496 check_treemem() ;
00497
00498 if (last_node)
00499 {
00500 for (int32_t q=0; q<4; q++)
00501 TreeMem[ret].child_weights[q]=0.0;
00502 }
00503 else
00504 {
00505 for (int32_t q=0; q<4; q++)
00506 TreeMem[ret].children[q]=NO_CHILD;
00507 }
00508 #ifdef TRIE_CHECK_EVERYTHING
00509 TreeMem[ret].has_seq=false ;
00510 TreeMem[ret].has_floats=false ;
00511 #endif
00512 TreeMem[ret].weight=0.0;
00513 return ret ;
00514 }
00515
00517 inline void check_treemem()
00518 {
00519 if (TreeMemPtr+10 < TreeMemPtrMax)
00520 return;
00521 SG_DEBUG( "Extending TreeMem from %i to %i elements\n",
00522 TreeMemPtrMax, (int32_t) ((float64_t)TreeMemPtrMax*1.2));
00523 TreeMemPtrMax = (int32_t) ((float64_t)TreeMemPtrMax*1.2);
00524 TreeMem = SG_REALLOC(Trie, TreeMem, TreeMemPtrMax);
00525 }
00526
00531 inline void set_weights_in_tree(bool weights_in_tree_)
00532 {
00533 weights_in_tree = weights_in_tree_;
00534 }
00535
00540 inline bool get_weights_in_tree()
00541 {
00542 return weights_in_tree;
00543 }
00544
00554 void POIMs_extract_W_helper(
00555 const int32_t nodeIdx, const int32_t depth, const int32_t offset,
00556 const int32_t y0, float64_t* const* const W, const int32_t K);
00557
00570 void POIMs_calc_SLR_helper1(
00571 const float64_t* const distrib, const int32_t i,
00572 const int32_t nodeIdx, int32_t left_tries_idx[4],
00573 const int32_t depth, int32_t const lastSym, float64_t* S,
00574 float64_t* L, float64_t* R);
00575
00576
00587 void POIMs_calc_SLR_helper2(
00588 const float64_t* const distrib, const int32_t i,
00589 const int32_t nodeIdx, int32_t left_tries_idx[4],
00590 const int32_t depth, float64_t* S, float64_t* L, float64_t* R);
00591
00602 void POIMs_add_SLR_helper1(
00603 const int32_t nodeIdx, const int32_t depth,const int32_t i,
00604 const int32_t y0, float64_t* const* const poims, const int32_t K,
00605 const int32_t debug);
00606
00620 void POIMs_add_SLR_helper2(
00621 float64_t* const* const poims, const int32_t K, const int32_t k,
00622 const int32_t i, const int32_t y, const float64_t valW,
00623 const float64_t valS, const float64_t valL, const float64_t valR,
00624 const int32_t debug);
00625
00627 inline virtual const char* get_name() const { return "Trie"; }
00628
00629 public:
00631 int32_t NUM_SYMS;
00632
00633 protected:
00635 int32_t length;
00637 int32_t * trees;
00638
00640 int32_t degree;
00642 float64_t* position_weights;
00643
00645 Trie* TreeMem;
00647 int32_t TreeMemPtr;
00649 int32_t TreeMemPtrMax;
00651 bool use_compact_terminal_nodes;
00652
00654 bool weights_in_tree;
00655
00657 int32_t* nofsKmers;
00658 };
00659 template <class Trie>
00660 CTrie<Trie>::CTrie()
00661 : CSGObject(), degree(0), position_weights(NULL),
00662 use_compact_terminal_nodes(false),
00663 weights_in_tree(true)
00664 {
00665
00666 TreeMemPtrMax=0;
00667 TreeMemPtr=0;
00668 TreeMem=NULL;
00669
00670 length=0;
00671 trees=NULL;
00672
00673 NUM_SYMS=4;
00674 }
00675
00676 template <class Trie>
00677 CTrie<Trie>::CTrie(int32_t d, bool p_use_compact_terminal_nodes)
00678 : CSGObject(), degree(d), position_weights(NULL),
00679 use_compact_terminal_nodes(p_use_compact_terminal_nodes),
00680 weights_in_tree(true)
00681 {
00682 TreeMemPtrMax=1024*1024/sizeof(Trie);
00683 TreeMemPtr=0;
00684 TreeMem=SG_MALLOC(Trie, TreeMemPtrMax);
00685
00686 length=0;
00687 trees=NULL;
00688
00689 NUM_SYMS=4;
00690 }
00691
00692 template <class Trie>
00693 CTrie<Trie>::CTrie(const CTrie & to_copy)
00694 : CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
00695 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
00696 {
00697 if (to_copy.position_weights!=NULL)
00698 {
00699 position_weights = to_copy.position_weights;
00700
00701
00702
00703 }
00704 else
00705 position_weights=NULL;
00706
00707 TreeMemPtrMax=to_copy.TreeMemPtrMax;
00708 TreeMemPtr=to_copy.TreeMemPtr;
00709 TreeMem=SG_MALLOC(Trie, TreeMemPtrMax);
00710 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie));
00711
00712 length=to_copy.length;
00713 trees=SG_MALLOC(int32_t, length);
00714 for (int32_t i=0; i<length; i++)
00715 trees[i]=to_copy.trees[i];
00716
00717 NUM_SYMS=4;
00718 }
00719
00720 template <class Trie>
00721 const CTrie<Trie> &CTrie<Trie>::operator=(const CTrie<Trie> & to_copy)
00722 {
00723 degree=to_copy.degree ;
00724 use_compact_terminal_nodes=to_copy.use_compact_terminal_nodes ;
00725
00726 SG_FREE(position_weights);
00727 position_weights=NULL ;
00728 if (to_copy.position_weights!=NULL)
00729 {
00730 position_weights=to_copy.position_weights ;
00731
00732
00733
00734 }
00735 else
00736 position_weights=NULL ;
00737
00738 TreeMemPtrMax=to_copy.TreeMemPtrMax ;
00739 TreeMemPtr=to_copy.TreeMemPtr ;
00740 SG_FREE(TreeMem) ;
00741 TreeMem = SG_MALLOC(Trie, TreeMemPtrMax);
00742 memcpy(TreeMem, to_copy.TreeMem, TreeMemPtrMax*sizeof(Trie)) ;
00743
00744 length = to_copy.length ;
00745 if (trees)
00746 SG_FREE(trees);
00747 trees=SG_MALLOC(int32_t, length);
00748 for (int32_t i=0; i<length; i++)
00749 trees[i]=to_copy.trees[i] ;
00750
00751 return *this ;
00752 }
00753
00754 template <class Trie>
00755 int32_t CTrie<Trie>::find_deepest_node(
00756 int32_t start_node, int32_t& deepest_node) const
00757 {
00758 #ifdef TRIE_CHECK_EVERYTHING
00759 int32_t ret=0 ;
00760 SG_DEBUG("start_node=%i\n", start_node) ;
00761
00762 if (start_node==NO_CHILD)
00763 {
00764 for (int32_t i=0; i<length; i++)
00765 {
00766 int32_t my_deepest_node ;
00767 int32_t depth=find_deepest_node(i, my_deepest_node) ;
00768 SG_DEBUG("start_node %i depth=%i\n", i, depth) ;
00769 if (depth>ret)
00770 {
00771 deepest_node=my_deepest_node ;
00772 ret=depth ;
00773 }
00774 }
00775 return ret ;
00776 }
00777
00778 if (TreeMem[start_node].has_seq)
00779 {
00780 for (int32_t q=0; q<16; q++)
00781 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
00782 ret++ ;
00783 deepest_node=start_node ;
00784 return ret ;
00785 }
00786 if (TreeMem[start_node].has_floats)
00787 {
00788 deepest_node=start_node ;
00789 return 1 ;
00790 }
00791
00792 for (int32_t q=0; q<4; q++)
00793 {
00794 int32_t my_deepest_node ;
00795 if (TreeMem[start_node].children[q]==NO_CHILD)
00796 continue ;
00797 int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
00798 if (depth>ret)
00799 {
00800 deepest_node=my_deepest_node ;
00801 ret=depth ;
00802 }
00803 }
00804 return ret ;
00805 #else
00806 SG_ERROR( "not implemented\n") ;
00807 return 0 ;
00808 #endif
00809 }
00810
00811 template <class Trie>
00812 int32_t CTrie<Trie>::compact_nodes(
00813 int32_t start_node, int32_t depth, float64_t * weights)
00814 {
00815 SG_ERROR( "code buggy\n") ;
00816
00817 int32_t ret=0 ;
00818
00819 if (start_node==NO_CHILD)
00820 {
00821 for (int32_t i=0; i<length; i++)
00822 compact_nodes(i,1, weights) ;
00823 return 0 ;
00824 }
00825 if (start_node<0)
00826 return -1 ;
00827
00828 if (depth==degree-1)
00829 {
00830 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
00831 int32_t num_used=0 ;
00832 for (int32_t q=0; q<4; q++)
00833 if (TreeMem[start_node].child_weights[q]!=0.0)
00834 num_used++ ;
00835 if (num_used>1)
00836 return -1 ;
00837 return 1 ;
00838 }
00839 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
00840
00841 int32_t num_used = 0 ;
00842 int32_t q_used=-1 ;
00843
00844 for (int32_t q=0; q<4; q++)
00845 {
00846 if (TreeMem[start_node].children[q]==NO_CHILD)
00847 continue ;
00848 num_used++ ;
00849 q_used=q ;
00850 }
00851 if (num_used>1)
00852 {
00853 if (depth>=degree-2)
00854 return -1 ;
00855 for (int32_t q=0; q<4; q++)
00856 {
00857 if (TreeMem[start_node].children[q]==NO_CHILD)
00858 continue ;
00859 int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
00860 if (num<=2)
00861 continue ;
00862 int32_t node=get_node() ;
00863
00864 int32_t last_node=TreeMem[start_node].children[q] ;
00865 if (weights_in_tree)
00866 {
00867 ASSERT(weights[depth]!=0.0) ;
00868 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
00869 }
00870 else
00871 TreeMem[node].weight=TreeMem[last_node].weight ;
00872
00873 #ifdef TRIE_CHECK_EVERYTHING
00874 TreeMem[node].has_seq=true ;
00875 #endif
00876 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
00877 for (int32_t n=0; n<num; n++)
00878 {
00879 ASSERT(depth+n+1<=degree-1) ;
00880 ASSERT(last_node!=NO_CHILD) ;
00881 if (depth+n+1==degree-1)
00882 {
00883 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
00884 int32_t k ;
00885 for (k=0; k<4; k++)
00886 if (TreeMem[last_node].child_weights[k]!=0.0)
00887 break ;
00888 if (k==4)
00889 break ;
00890 TreeMem[node].seq[n]=k ;
00891 break ;
00892 }
00893 else
00894 {
00895 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
00896 int32_t k ;
00897 for (k=0; k<4; k++)
00898 if (TreeMem[last_node].children[k]!=NO_CHILD)
00899 break ;
00900 if (k==4)
00901 break ;
00902 TreeMem[node].seq[n]=k ;
00903 last_node=TreeMem[last_node].children[k] ;
00904 }
00905 }
00906 TreeMem[start_node].children[q]=-node ;
00907 }
00908 return -1 ;
00909 }
00910 if (num_used==0)
00911 return 0 ;
00912
00913 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
00914 if (ret<0)
00915 return ret ;
00916 return ret+1 ;
00917 }
00918
00919
00920 template <class Trie>
00921 bool CTrie<Trie>::compare_traverse(
00922 int32_t node, const CTrie<Trie> & other, int32_t other_node)
00923 {
00924 SG_DEBUG("checking nodes %i and %i\n", node, other_node) ;
00925 if (fabs(TreeMem[node].weight-other.TreeMem[other_node].weight)>=1e-5)
00926 {
00927 SG_DEBUG( "CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.TreeMem[other_node].weight) ;
00928 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00929 display_node(node) ;
00930 SG_DEBUG( "============================================================\n") ;
00931 other.display_node(other_node) ;
00932 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00933 return false ;
00934 }
00935
00936 #ifdef TRIE_CHECK_EVERYTHING
00937 if (TreeMem[node].has_seq!=other.TreeMem[other_node].has_seq)
00938 {
00939 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.TreeMem[other_node].has_seq) ;
00940 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00941 display_node(node) ;
00942 SG_DEBUG( "============================================================\n") ;
00943 other.display_node(other_node) ;
00944 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00945 return false ;
00946 }
00947 if (TreeMem[node].has_floats!=other.TreeMem[other_node].has_floats)
00948 {
00949 SG_DEBUG( "CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.TreeMem[other_node].has_floats) ;
00950 return false ;
00951 }
00952 if (other.TreeMem[other_node].has_floats)
00953 {
00954 for (int32_t q=0; q<4; q++)
00955 if (fabs(TreeMem[node].child_weights[q]-other.TreeMem[other_node].child_weights[q])>1e-5)
00956 {
00957 SG_DEBUG( "CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.TreeMem[other_node].child_weights[q]) ;
00958 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00959 display_node(node) ;
00960 SG_DEBUG( "============================================================\n") ;
00961 other.display_node(other_node) ;
00962 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00963 return false ;
00964 }
00965 }
00966 if (other.TreeMem[other_node].has_seq)
00967 {
00968 for (int32_t q=0; q<16; q++)
00969 if ((TreeMem[node].seq[q]!=other.TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.TreeMem[other_node].seq[q]<4)))
00970 {
00971 SG_DEBUG( "CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.TreeMem[other_node].seq[q]) ;
00972 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00973 display_node(node) ;
00974 SG_DEBUG( "============================================================\n") ;
00975 other.display_node(other_node) ;
00976 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00977 return false ;
00978 }
00979 }
00980 if (!other.TreeMem[other_node].has_seq && !other.TreeMem[other_node].has_floats)
00981 {
00982 for (int32_t q=0; q<4; q++)
00983 {
00984 if ((TreeMem[node].children[q]==NO_CHILD) && (other.TreeMem[other_node].children[q]==NO_CHILD))
00985 continue ;
00986 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.TreeMem[other_node].children[q]==NO_CHILD))
00987 {
00988 SG_DEBUG( "CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.TreeMem[other_node].children[q]) ;
00989 SG_DEBUG( ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
00990 display_node(node) ;
00991 SG_DEBUG( "============================================================\n") ;
00992 other.display_node(other_node) ;
00993 SG_DEBUG( "<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
00994 return false ;
00995 }
00996 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.TreeMem[other_node].children[q])))
00997 return false ;
00998 }
00999 }
01000 #else
01001 SG_ERROR( "not implemented\n") ;
01002 #endif
01003
01004 return true ;
01005 }
01006
01007 template <class Trie>
01008 bool CTrie<Trie>::compare(const CTrie<Trie> & other)
01009 {
01010 bool ret=true ;
01011 for (int32_t i=0; i<length; i++)
01012 if (!compare_traverse(trees[i], other, other.trees[i]))
01013 return false ;
01014 else
01015 SG_DEBUG("two tries at %i identical\n", i) ;
01016
01017 return ret ;
01018 }
01019
01020 template <class Trie>
01021 bool CTrie<Trie>::find_node(
01022 int32_t node, int32_t * trace, int32_t& trace_len) const
01023 {
01024 #ifdef TRIE_CHECK_EVERYTHING
01025 ASSERT(trace_len-1>=0) ;
01026 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
01027 if (TreeMem[trace[trace_len-1]].has_seq)
01028 return false ;
01029 if (TreeMem[trace[trace_len-1]].has_floats)
01030 return false ;
01031
01032 for (int32_t q=0; q<4; q++)
01033 {
01034 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
01035 continue ;
01036 int32_t tl=trace_len+1 ;
01037 if (TreeMem[trace[trace_len-1]].children[q]>=0)
01038 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
01039 else
01040 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
01041
01042 if (trace[trace_len]==node)
01043 {
01044 trace_len=tl ;
01045 return true ;
01046 }
01047 if (find_node(node, trace, tl))
01048 {
01049 trace_len=tl ;
01050 return true ;
01051 }
01052 }
01053 trace_len=0 ;
01054 return false ;
01055 #else
01056 SG_ERROR( "not implemented\n") ;
01057 return false ;
01058 #endif
01059 }
01060
01061 template <class Trie>
01062 void CTrie<Trie>::display_node(int32_t node) const
01063 {
01064 #ifdef TRIE_CHECK_EVERYTHING
01065 int32_t * trace=SG_MALLOC(int32_t, 2*degree);
01066 int32_t trace_len=-1 ;
01067 bool found = false ;
01068 int32_t tree=-1 ;
01069 for (tree=0; tree<length; tree++)
01070 {
01071 trace[0]=trees[tree] ;
01072 trace_len=1 ;
01073 found=find_node(node, trace, trace_len) ;
01074 if (found)
01075 break ;
01076 }
01077 ASSERT(found) ;
01078 SG_PRINT( "position %i trace: ", tree) ;
01079
01080 for (int32_t i=0; i<trace_len-1; i++)
01081 {
01082 int32_t branch=-1 ;
01083 for (int32_t q=0; q<4; q++)
01084 if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
01085 {
01086 branch=q;
01087 break ;
01088 }
01089 ASSERT(branch!=-1) ;
01090 char acgt[5]="ACGT" ;
01091 SG_PRINT( "%c", acgt[branch]) ;
01092 }
01093 SG_PRINT( "\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
01094 if (TreeMem[node].has_floats)
01095 {
01096 for (int32_t q=0; q<4; q++)
01097 SG_PRINT( "child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
01098 }
01099 if (TreeMem[node].has_seq)
01100 {
01101 for (int32_t q=0; q<16; q++)
01102 SG_PRINT( "seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
01103 }
01104 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
01105 {
01106 for (int32_t q=0; q<4; q++)
01107 {
01108 if (TreeMem[node].children[q]!=NO_CHILD)
01109 {
01110 SG_PRINT( "children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
01111 display_node(abs(TreeMem[node].children[q])) ;
01112 }
01113 else
01114 SG_PRINT( "children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
01115 }
01116
01117 }
01118
01119 SG_FREE(trace);
01120 #else
01121 SG_ERROR( "not implemented\n") ;
01122 #endif
01123 }
01124
01125
01126 template <class Trie> CTrie<Trie>::~CTrie()
01127 {
01128 destroy() ;
01129
01130 SG_FREE(TreeMem) ;
01131 }
01132
01133 template <class Trie> void CTrie<Trie>::destroy()
01134 {
01135 if (trees!=NULL)
01136 {
01137 delete_trees();
01138 for (int32_t i=0; i<length; i++)
01139 trees[i] = NO_CHILD;
01140 SG_FREE(trees);
01141
01142 TreeMemPtr=0;
01143 length=0;
01144 trees=NULL;
01145 }
01146 }
01147
01148 template <class Trie> void CTrie<Trie>::set_degree(int32_t d)
01149 {
01150 delete_trees(get_use_compact_terminal_nodes());
01151 degree=d;
01152 }
01153
01154 template <class Trie> void CTrie<Trie>::create(
01155 int32_t len, bool p_use_compact_terminal_nodes)
01156 {
01157 destroy();
01158
01159 trees=SG_MALLOC(int32_t, len);
01160 TreeMemPtr=0 ;
01161 for (int32_t i=0; i<len; i++)
01162 trees[i]=get_node(degree==1);
01163 length = len ;
01164
01165 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01166 }
01167
01168
01169 template <class Trie> void CTrie<Trie>::delete_trees(
01170 bool p_use_compact_terminal_nodes)
01171 {
01172 if (trees==NULL)
01173 return;
01174
01175 TreeMemPtr=0 ;
01176 for (int32_t i=0; i<length; i++)
01177 trees[i]=get_node(degree==1);
01178
01179 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
01180 }
01181
01182 template <class Trie>
01183 float64_t CTrie<Trie>::compute_abs_weights_tree(int32_t tree, int32_t depth)
01184 {
01185 float64_t ret=0 ;
01186
01187 if (tree==NO_CHILD)
01188 return 0 ;
01189 TRIE_ASSERT(tree>=0) ;
01190
01191 if (depth==degree-2)
01192 {
01193 ret+=(TreeMem[tree].weight) ;
01194
01195 for (int32_t k=0; k<4; k++)
01196 ret+=(TreeMem[tree].child_weights[k]) ;
01197
01198 return ret ;
01199 }
01200
01201 ret+=(TreeMem[tree].weight) ;
01202
01203 for (int32_t i=0; i<4; i++)
01204 if (TreeMem[tree].children[i]!=NO_CHILD)
01205 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ;
01206
01207 return ret ;
01208 }
01209
01210
01211 template <class Trie>
01212 float64_t *CTrie<Trie>::compute_abs_weights(int32_t &len)
01213 {
01214 float64_t * sum=SG_MALLOC(float64_t, length*4);
01215 for (int32_t i=0; i<length*4; i++)
01216 sum[i]=0 ;
01217 len=length ;
01218
01219 for (int32_t i=0; i<length; i++)
01220 {
01221 TRIE_ASSERT(trees[i]!=NO_CHILD) ;
01222 for (int32_t k=0; k<4; k++)
01223 {
01224 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
01225 }
01226 }
01227
01228 return sum ;
01229 }
01230
01231 template <class Trie>
01232 void CTrie<Trie>::add_example_to_tree_mismatch_recursion(
01233 int32_t tree, int32_t i, float64_t alpha,
01234 int32_t *vec, int32_t len_rem,
01235 int32_t degree_rec, int32_t mismatch_rec,
01236 int32_t max_mismatch, float64_t * weights)
01237 {
01238 if (tree==NO_CHILD)
01239 tree=trees[i] ;
01240 TRIE_ASSERT(tree!=NO_CHILD) ;
01241
01242 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
01243 return ;
01244 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
01245
01246 int32_t subtree = NO_CHILD ;
01247
01248 if (degree_rec==degree-1)
01249 {
01250 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01251 if (weights_in_tree)
01252 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
01253 else
01254 if (weights[degree_rec]!=0.0)
01255 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01256 if (mismatch_rec+1<=max_mismatch)
01257 for (int32_t o=0; o<3; o++)
01258 {
01259 if (weights_in_tree)
01260 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01261 else
01262 if (weights[degree_rec]!=0.0)
01263 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01264 }
01265 return ;
01266 }
01267 else
01268 {
01269 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01270 if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
01271 {
01272 subtree=TreeMem[tree].children[vec[0]] ;
01273 if (weights_in_tree)
01274 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
01275 else
01276 if (weights[degree_rec]!=0.0)
01277 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
01278 }
01279 else
01280 {
01281 int32_t tmp = get_node(degree_rec==degree-2);
01282 ASSERT(tmp>=0) ;
01283 TreeMem[tree].children[vec[0]]=tmp ;
01284 subtree=tmp ;
01285 #ifdef TRIE_CHECK_EVERYTHING
01286 if (degree_rec==degree-2)
01287 TreeMem[subtree].has_floats=true ;
01288 #endif
01289 if (weights_in_tree)
01290 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
01291 else
01292 if (weights[degree_rec]!=0.0)
01293 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
01294 else
01295 TreeMem[subtree].weight = 0.0 ;
01296 }
01297 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
01298 &vec[1], len_rem-1,
01299 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
01300
01301 if (mismatch_rec+1<=max_mismatch)
01302 {
01303 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01304 for (int32_t o=0; o<3; o++)
01305 {
01306 int32_t ot = other[vec[0]][o] ;
01307 if (TreeMem[tree].children[ot]!=NO_CHILD)
01308 {
01309 subtree=TreeMem[tree].children[ot] ;
01310 if (weights_in_tree)
01311 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
01312 else
01313 if (weights[degree_rec]!=0.0)
01314 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
01315 }
01316 else
01317 {
01318 int32_t tmp = get_node(degree_rec==degree-2);
01319 ASSERT(tmp>=0) ;
01320 TreeMem[tree].children[ot]=tmp ;
01321 subtree=tmp ;
01322 #ifdef TRIE_CHECK_EVERYTHING
01323 if (degree_rec==degree-2)
01324 TreeMem[subtree].has_floats=true ;
01325 #endif
01326
01327 if (weights_in_tree)
01328 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
01329 else
01330 if (weights[degree_rec]!=0.0)
01331 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
01332 else
01333 TreeMem[subtree].weight = 0.0 ;
01334 }
01335
01336 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
01337 &vec[1], len_rem-1,
01338 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
01339 }
01340 }
01341 }
01342 }
01343
01344 template <class Trie>
01345 void CTrie<Trie>::compute_scoring_helper(
01346 int32_t tree, int32_t i, int32_t j, float64_t weight, int32_t d,
01347 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
01348 int32_t offs, float64_t* result)
01349 {
01350 if (i+j<num_feat)
01351 {
01352 float64_t decay=1.0;
01353
01354
01355
01356 if (j<degree-1)
01357 {
01358 for (int32_t k=0; k<num_sym; k++)
01359 {
01360 if (TreeMem[tree].children[k]!=NO_CHILD)
01361 {
01362 int32_t child=TreeMem[tree].children[k];
01363
01364 if (d<max_degree-1)
01365 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01366 else
01367 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
01368
01370 if (d==0)
01371 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
01372 }
01373 }
01374 }
01375 else if (j==degree-1)
01376 {
01377 for (int32_t k=0; k<num_sym; k++)
01378 {
01379
01380 if (d<max_degree-1 && i<num_feat-1)
01381 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
01382 else
01383 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
01384 }
01385 }
01386 }
01387 }
01388
01389 template <class Trie>
01390 void CTrie<Trie>::traverse(
01391 int32_t tree, const int32_t p, struct TreeParseInfo info,
01392 const int32_t depth, int32_t* const x, const int32_t k)
01393 {
01394 const int32_t num_sym = info.num_sym;
01395 const int32_t y0 = info.y0;
01396 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
01397
01398
01399
01400
01401
01402 int32_t sym;
01403 ASSERT( depth < degree );
01404
01405 if (depth<degree-1)
01406 {
01407 for( sym=0; sym<num_sym; ++sym ) {
01408 const int32_t childNum = TreeMem[tree].children[ sym ];
01409 if( childNum != NO_CHILD ) {
01410 int32_t child = childNum ;
01411 x[depth] = sym;
01412 info.substrs[depth+1] = y0 + sym;
01413 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01414
01415 count( TreeMem[child].weight, depth, info, p, x, k );
01416 traverse( child, p, info, depth+1, x, k );
01417 x[depth] = -1;
01418 }
01419 }
01420 }
01421 else if( depth == degree-1 )
01422 {
01423 for( sym=0; sym<num_sym; ++sym ) {
01424 const float64_t w = TreeMem[tree].child_weights[ sym ];
01425 if( w != 0.0 ) {
01426 x[depth] = sym;
01427 info.substrs[depth+1] = y0 + sym;
01428 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
01429
01430 count( w, depth, info, p, x, k );
01431 x[depth] = -1;
01432 }
01433 }
01434 }
01435
01436
01437 }
01438
01439 template <class Trie>
01440 void CTrie<Trie>::count(
01441 const float64_t w, const int32_t depth, const struct TreeParseInfo info,
01442 const int32_t p, int32_t* x, const int32_t k)
01443 {
01444 ASSERT( fabs(w) < 1e10 );
01445 ASSERT( x[depth] >= 0 );
01446 ASSERT( x[depth+1] < 0 );
01447 if ( depth < k ) {
01448 return;
01449 }
01450
01451 const int32_t nofKmers = info.nofsKmers[k];
01452 const float64_t margWeight = w * info.margFactors[ depth-k ];
01453 const int32_t m_a = depth - k + 1;
01454 const int32_t m_b = info.num_feat - p;
01455 const int32_t m = ( m_a < m_b ) ? m_a : m_b;
01456
01457 const int32_t offset0 = nofKmers * p;
01458 register int32_t i;
01459 register int32_t offset;
01460 offset = offset0;
01461 for( i = 0; i < m; ++i ) {
01462 const int32_t y = info.substrs[i+k+1];
01463 info.C_k[ y + offset ] += margWeight;
01464 offset += nofKmers;
01465 }
01466 if( depth > k ) {
01467
01468 const int32_t offsR = info.substrs[k+1] + offset0;
01469 info.R_k[offsR] += margWeight;
01470
01471 if( p+depth-k < info.num_feat ) {
01472 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
01473 info.L_k[offsL] += margWeight;
01474 }
01475 }
01476
01477
01478
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488
01489
01490
01491 }
01492
01493 template <class Trie>
01494 void CTrie<Trie>::add_to_trie(
01495 int32_t i, int32_t seq_offset, int32_t * vec, float32_t alpha,
01496 float64_t *weights, bool degree_times_position_weights)
01497 {
01498 int32_t tree = trees[i] ;
01499
01500
01501 int32_t max_depth = 0 ;
01502 float64_t* weights_column ;
01503 if (degree_times_position_weights)
01504 weights_column = &weights[(i+seq_offset)*degree] ;
01505 else
01506 weights_column = weights ;
01507
01508 if (weights_in_tree)
01509 {
01510 for (int32_t j=0; (j<degree) && (i+j<length); j++)
01511 if (CMath::abs(weights_column[j]*alpha)>0)
01512 max_depth = j+1 ;
01513 }
01514 else
01515
01516 max_depth=degree ;
01517
01518 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
01519 {
01520 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
01521 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
01522 {
01523 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
01524 {
01525
01526 TRIE_ASSERT(j >= degree-16) ;
01527
01528 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01529 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01530 int32_t node = - TreeMem[tree].children[vec[i+j+seq_offset]] ;
01531
01532 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
01533 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
01534 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
01535
01536
01537 int32_t mismatch_pos = -1 ;
01538 {
01539 int32_t k ;
01540 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
01541 {
01542 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01543
01544 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
01545 fprintf(stderr, "+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
01546 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
01547 TRIE_ASSERT(k<16) ;
01548 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
01549 {
01550 mismatch_pos=k ;
01551 break ;
01552 }
01553 }
01554 }
01555
01556
01557
01558 if (mismatch_pos==-1)
01559
01560 TreeMem[node].weight+=alpha ;
01561 else
01562
01563
01564
01565
01566 {
01567
01568 int32_t last_node=tree ;
01569
01570
01571 int32_t k ;
01572 for (k=0; k<mismatch_pos; k++)
01573 {
01574 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
01575 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
01576
01577 int32_t tmp=get_node();
01578 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
01579 last_node=tmp ;
01580 if (weights_in_tree)
01581 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
01582 else
01583 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
01584 TRIE_ASSERT(j+k!=degree-1) ;
01585 }
01586 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
01587 fprintf(stderr, "**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
01588 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
01589 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
01590
01591 if (j+k==degree-1)
01592 {
01593
01594
01595
01596 for (int32_t q=0; q<4; q++)
01597 TreeMem[last_node].child_weights[q]=0.0 ;
01598 if (weights_in_tree)
01599 {
01600 if (TreeMem[node].seq[mismatch_pos]<4)
01601 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
01602 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
01603 }
01604 else
01605 {
01606 if (TreeMem[node].seq[mismatch_pos]<4)
01607 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
01608 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
01609 }
01610
01611 #ifdef TRIE_CHECK_EVERYTHING
01612 TreeMem[last_node].has_floats=true ;
01613 #endif
01614 }
01615 else
01616 {
01617
01618 if (TreeMem[node].seq[mismatch_pos]<4)
01619 {
01620 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
01621
01622
01623 for (int32_t q=0; q<16; q++)
01624 {
01625 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
01626 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
01627 else
01628 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
01629 }
01630 #ifdef TRIE_CHECK_EVERYTHING
01631 TreeMem[node].has_seq=true ;
01632 #endif
01633 }
01634
01635
01636 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
01637 int32_t tmp = get_node() ;
01638 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
01639 last_node=tmp ;
01640
01641 TreeMem[last_node].weight = alpha ;
01642 #ifdef TRIE_CHECK_EVERYTHING
01643 TreeMem[last_node].has_seq = true ;
01644 #endif
01645 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01646 for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
01647 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
01648 }
01649 }
01650 break ;
01651 }
01652 else
01653 {
01654 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
01655 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01656 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01657 if (weights_in_tree)
01658 TreeMem[tree].weight += alpha*weights_column[j];
01659 else
01660 TreeMem[tree].weight += alpha ;
01661 }
01662 }
01663 else if (j==degree-1)
01664 {
01665
01666 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01667 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01668 if (weights_in_tree)
01669 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
01670 else
01671 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
01672
01673 break;
01674 }
01675 else
01676 {
01677 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
01678 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01679 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01680
01681 int32_t tmp = get_node((j==degree-2) && (!use_seq));
01682 if (use_seq)
01683 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
01684 else
01685 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
01686 tree=tmp ;
01687
01688 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
01689 #ifdef TRIE_CHECK_EVERYTHING
01690 TreeMem[tree].has_seq = use_seq ;
01691 #endif
01692 if (use_seq)
01693 {
01694 TreeMem[tree].weight = alpha ;
01695
01696 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
01697 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
01698 {
01699 TRIE_ASSERT(q<16) ;
01700 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
01701 }
01702 break ;
01703 }
01704 else
01705 {
01706 if (weights_in_tree)
01707 TreeMem[tree].weight = alpha*weights_column[j] ;
01708 else
01709 TreeMem[tree].weight = alpha ;
01710 #ifdef TRIE_CHECK_EVERYTHING
01711 if (j==degree-2)
01712 TreeMem[tree].has_floats = true ;
01713 #endif
01714 }
01715 }
01716 }
01717 }
01718
01719 template <class Trie>
01720 float64_t CTrie<Trie>::compute_by_tree_helper(
01721 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01722 int32_t weight_pos, float64_t* weights,
01723 bool degree_times_position_weights)
01724 {
01725 int32_t tree = trees[tree_pos] ;
01726
01727 if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
01728 return 0.0;
01729
01730 float64_t *weights_column=NULL ;
01731 if (degree_times_position_weights)
01732 weights_column=&weights[weight_pos*degree] ;
01733 else
01734 weights_column=weights ;
01735
01736 float64_t sum=0 ;
01737 for (int32_t j=0; seq_pos+j < len; j++)
01738 {
01739 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
01740
01741 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01742 {
01743 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01744 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01745 {
01746 tree = - TreeMem[tree].children[vec[seq_pos+j]];
01747 TRIE_ASSERT(tree>=0) ;
01748 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01749 float64_t this_weight=0.0 ;
01750 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01751 {
01752 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
01753 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01754 break ;
01755 this_weight += weights_column[j+k] ;
01756 }
01757 sum += TreeMem[tree].weight * this_weight ;
01758 break ;
01759 }
01760 else
01761 {
01762 tree=TreeMem[tree].children[vec[seq_pos+j]];
01763 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01764 if (weights_in_tree)
01765 sum += TreeMem[tree].weight ;
01766 else
01767 sum += TreeMem[tree].weight * weights_column[j] ;
01768 } ;
01769 }
01770 else
01771 {
01772 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01773 if (j==degree-1)
01774 {
01775 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
01776 if (weights_in_tree)
01777 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01778 else
01779 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
01780 }
01781 else
01782 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
01783
01784 break;
01785 }
01786 }
01787
01788 if (position_weights!=NULL)
01789 return sum*position_weights[weight_pos] ;
01790 else
01791 return sum ;
01792 }
01793
01794 template <class Trie>
01795 void CTrie<Trie>::compute_by_tree_helper(
01796 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
01797 int32_t weight_pos, float64_t* LevelContrib, float64_t factor,
01798 int32_t mkl_stepsize, float64_t * weights,
01799 bool degree_times_position_weights)
01800 {
01801 int32_t tree = trees[tree_pos] ;
01802 if (factor==0)
01803 return ;
01804
01805 if (position_weights!=NULL)
01806 {
01807 factor *= position_weights[weight_pos] ;
01808 if (factor==0)
01809 return ;
01810 if (!degree_times_position_weights)
01811 {
01812 for (int32_t j=0; seq_pos+j<len; j++)
01813 {
01814 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01815 {
01816 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01817 {
01818 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01819 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01820 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01821 {
01822 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01823 break ;
01824 if (weights_in_tree)
01825 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01826 else
01827 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01828 }
01829 break ;
01830 }
01831 else
01832 {
01833 tree=TreeMem[tree].children[vec[seq_pos+j]];
01834 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01835 if (weights_in_tree)
01836 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01837 else
01838 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01839 }
01840 }
01841 else
01842 {
01843 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01844 if (j==degree-1)
01845 {
01846 if (weights_in_tree)
01847 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01848 else
01849 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01850 }
01851 }
01852 }
01853 }
01854 else
01855 {
01856 for (int32_t j=0; seq_pos+j<len; j++)
01857 {
01858 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01859 {
01860 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01861 {
01862 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01863 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01864 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01865 {
01866 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01867 break ;
01868 if (weights_in_tree)
01869 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01870 else
01871 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01872 }
01873 break ;
01874 }
01875 else
01876 {
01877 tree=TreeMem[tree].children[vec[seq_pos+j]];
01878 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01879 if (weights_in_tree)
01880 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
01881 else
01882 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
01883 }
01884 }
01885 else
01886 {
01887 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01888 if (j==degree-1)
01889 {
01890 if (weights_in_tree)
01891 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01892 else
01893 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
01894 }
01895 break ;
01896 }
01897 }
01898 }
01899 }
01900 else if (!degree_times_position_weights)
01901 {
01902 for (int32_t j=0; seq_pos+j<len; j++)
01903 {
01904 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01905 {
01906 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01907 {
01908 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01909 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01910 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01911 {
01912 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01913 break ;
01914 if (weights_in_tree)
01915 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01916 else
01917 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
01918 }
01919 break ;
01920 }
01921 else
01922 {
01923 tree=TreeMem[tree].children[vec[seq_pos+j]];
01924 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01925 if (weights_in_tree)
01926 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
01927 else
01928 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
01929 }
01930 }
01931 else
01932 {
01933 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01934 if (j==degree-1)
01935 {
01936 if (weights_in_tree)
01937 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
01938 else
01939 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
01940 }
01941 break ;
01942 }
01943 }
01944 }
01945 else
01946 {
01947
01948
01949
01950
01951
01952
01953
01954
01955
01956
01957
01958
01959
01960
01961
01962
01963
01964
01965 for (int32_t j=0; seq_pos+j<len; j++)
01966 {
01967 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
01968 {
01969 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
01970 {
01971 tree = -TreeMem[tree].children[vec[seq_pos+j]];
01972 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
01973 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
01974 {
01975 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
01976 break ;
01977 if (weights_in_tree)
01978 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
01979 else
01980 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
01981 }
01982 break ;
01983 }
01984 else
01985 {
01986 tree=TreeMem[tree].children[vec[seq_pos+j]];
01987 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01988 if (weights_in_tree)
01989 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
01990 else
01991 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
01992 }
01993 }
01994 else
01995 {
01996 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
01997 if (j==degree-1)
01998 {
01999 if (weights_in_tree)
02000 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
02001 else
02002 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
02003 }
02004 break ;
02005 }
02006 }
02007 }
02008 }
02009
02010 template <class Trie>
02011 void CTrie<Trie>::fill_backtracking_table_recursion(
02012 Trie* tree, int32_t depth, uint64_t seq, float64_t value,
02013 DynArray<ConsensusEntry>* table, float64_t* weights)
02014 {
02015 float64_t w=1.0;
02016
02017 if (weights_in_tree || depth==0)
02018 value+=tree->weight;
02019 else
02020 {
02021 w=weights[degree-1];
02022 value+=weights[depth-1]*tree->weight;
02023 }
02024
02025 if (degree-1==depth)
02026 {
02027 for (int32_t sym=0; sym<4; sym++)
02028 {
02029 float64_t v=w*tree->child_weights[sym];
02030 if (v!=0.0)
02031 {
02032 ConsensusEntry entry;
02033 entry.bt=-1;
02034 entry.score=value+v;
02035 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02036
02037 table->append_element(entry);
02038 }
02039 }
02040 }
02041 else
02042 {
02043 for (int32_t sym=0; sym<4; sym++)
02044 {
02045 uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
02046 if (tree->children[sym] != NO_CHILD)
02047 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
02048 }
02049 }
02050 }
02051
02052 template <class Trie>
02053 float64_t CTrie<Trie>::get_cumulative_score(
02054 int32_t pos, uint64_t seq, int32_t deg, float64_t* weights)
02055 {
02056 float64_t result=0.0;
02057
02058
02059
02060 for (int32_t i=pos; i<pos+deg && i<length; i++)
02061 {
02062
02063 Trie* tree = &TreeMem[trees[i]];
02064
02065 for (int32_t d=0; d<deg-i+pos; d++)
02066 {
02067
02068 ASSERT(d-1<degree);
02069 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
02070
02071 float64_t w=1.0;
02072 if (!weights_in_tree)
02073 w=weights[d];
02074
02075 ASSERT(tree->children[sym] != NO_CHILD);
02076 tree=&TreeMem[tree->children[sym]];
02077 result+=w*tree->weight;
02078 }
02079 }
02080
02081 return result;
02082 }
02083
02084 template <class Trie>
02085 void CTrie<Trie>::fill_backtracking_table(
02086 int32_t pos, DynArray<ConsensusEntry>* prev,
02087 DynArray<ConsensusEntry>* cur, bool cumulative, float64_t* weights)
02088 {
02089 ASSERT(pos>=0 && pos<length);
02090 ASSERT(!use_compact_terminal_nodes);
02091
02092 Trie* t = &TreeMem[trees[pos]];
02093
02094 fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
02095
02096
02097 if (cumulative)
02098 {
02099 int32_t num_cur=cur->get_num_elements();
02100 for (int32_t i=0; i<num_cur; i++)
02101 {
02102 ConsensusEntry entry=cur->get_element(i);
02103 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
02104 cur->set_element(entry,i);
02105
02106 }
02107 }
02108
02109
02110
02111 if (prev)
02112 {
02113 int32_t num_cur=cur->get_num_elements();
02114 int32_t num_prev=prev->get_num_elements();
02115
02116 for (int32_t i=0; i<num_cur; i++)
02117 {
02118
02119 uint64_t str_cur= cur->get_element(i).string >> 2;
02120
02121
02122 int32_t bt=-1;
02123 float64_t max_score=0.0;
02124
02125 for (int32_t j=0; j<num_prev; j++)
02126 {
02127
02128 uint64_t mask=
02129 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
02130 uint64_t str_prev= mask & prev->get_element(j).string;
02131
02132
02133 if (str_cur == str_prev)
02134 {
02135 float64_t sc=prev->get_element(j).score+cur->get_element(i).score;
02136 if (bt==-1 || sc>max_score)
02137 {
02138 bt=j;
02139 max_score=sc;
02140
02141
02142 }
02143 }
02144 }
02145
02146 ASSERT(bt!=-1);
02147 ConsensusEntry entry;
02148 entry.bt=bt;
02149 entry.score=max_score;
02150 entry.string=cur->get_element(i).string;
02151 cur->set_element(entry, i);
02152
02153 }
02154 }
02155 }
02156 }
02157 #endif // _TRIE_H___