24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
27 #define NO_CHILD ((int32_t)-1073741824)
29 #define WEIGHTS_IN_TRIE
32 #ifdef TRIE_CHECK_EVERYTHING
33 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
35 #define TRIE_ASSERT_EVERYTHING(x)
39 #define TRIE_ASSERT(x)
41 #define TRIE_TERMINAL_CHARACTER 7
59 #ifdef TRIE_CHECK_EVERYTHING
88 #ifdef TRIE_CHECK_EVERYTHING
106 struct TreeParseInfo {
133 #endif // DOXYGEN_SHOULD_SKIP_THIS
135 template <
class Trie>
class CTrie;
137 #define IGNORE_IN_CLASSLIST
168 CTrie(int32_t d,
bool p_use_compact_terminal_nodes=
true);
185 int32_t
node,
const CTrie & other, int32_t other_node);
200 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len)
const;
209 int32_t start_node, int32_t &deepest_node)
const;
232 void create(int32_t len,
bool p_use_compact_terminal_nodes=
true);
239 void delete_trees(
bool p_use_compact_terminal_nodes=
true);
252 int32_t i, int32_t seq_offset, int32_t* vec,
float32_t alpha,
253 float64_t *weights,
bool degree_times_position_weights);
283 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
285 bool degree_times_position_weights) ;
302 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
304 int32_t mkl_stepsize,
float64_t * weights,
305 bool degree_times_position_weights);
322 int32_t tree, int32_t i, int32_t j,
float64_t weight, int32_t d,
323 int32_t max_degree, int32_t num_feat, int32_t num_sym,
324 int32_t sym_offset, int32_t offs,
float64_t* result);
339 int32_t tree, int32_t i,
float64_t alpha, int32_t *vec,
340 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
341 int32_t max_mismatch,
float64_t * weights);
353 int32_t tree,
const int32_t p,
struct TreeParseInfo info,
354 const int32_t depth, int32_t*
const x,
const int32_t k);
367 const struct TreeParseInfo info,
const int32_t p, int32_t* x,
387 int32_t pos, uint64_t seq, int32_t deg,
float64_t* weights);
399 Trie* tree, int32_t depth, uint64_t seq,
float64_t value,
438 const int32_t parentIdx,
const int32_t sym,
const int32_t depth,
448 float64_t*
const*
const poims,
const int32_t K,
449 const int32_t debug);
466 bool p_use_compact_terminal_nodes)
500 for (int32_t q=0; q<4; q++)
501 TreeMem[ret].child_weights[q]=0.0;
505 for (int32_t q=0; q<4; q++)
506 TreeMem[ret].children[q]=NO_CHILD;
508 #ifdef TRIE_CHECK_EVERYTHING
510 TreeMem[ret].has_floats=false ;
521 SG_DEBUG(
"Extending TreeMem from %i to %i elements\n",
524 TreeMemPtrMax = (int32_t) ((
float64_t)TreeMemPtrMax*1.2);
556 const int32_t nodeIdx,
const int32_t depth,
const int32_t offset,
557 const int32_t y0,
float64_t*
const*
const W,
const int32_t K);
572 const float64_t*
const distrib,
const int32_t i,
573 const int32_t nodeIdx, int32_t left_tries_idx[4],
574 const int32_t depth, int32_t
const lastSym,
float64_t* S,
589 const float64_t*
const distrib,
const int32_t i,
590 const int32_t nodeIdx, int32_t left_tries_idx[4],
604 const int32_t nodeIdx,
const int32_t depth,
const int32_t i,
605 const int32_t y0,
float64_t*
const*
const poims,
const int32_t K,
606 const int32_t debug);
622 float64_t*
const*
const poims,
const int32_t K,
const int32_t k,
623 const int32_t i,
const int32_t y,
const float64_t valW,
625 const int32_t debug);
628 virtual const char*
get_name()
const {
return "Trie"; }
660 template <
class Trie>
662 :
CSGObject(), degree(0), position_weights(NULL),
663 use_compact_terminal_nodes(false),
664 weights_in_tree(true)
677 template <
class Trie>
679 :
CSGObject(), degree(d), position_weights(NULL),
680 use_compact_terminal_nodes(p_use_compact_terminal_nodes),
681 weights_in_tree(true)
693 template <
class Trie>
695 :
CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
696 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
715 for (int32_t i=0; i<
length; i++)
721 template <
class Trie>
727 SG_FREE(position_weights);
728 position_weights=NULL ;
737 position_weights=NULL ;
742 TreeMem = SG_MALLOC(Trie, TreeMemPtrMax);
743 memcpy(TreeMem, to_copy.
TreeMem, TreeMemPtrMax*
sizeof(Trie)) ;
748 trees=SG_MALLOC(int32_t, length);
749 for (int32_t i=0; i<length; i++)
750 trees[i]=to_copy.
trees[i] ;
755 template <
class Trie>
757 int32_t start_node, int32_t& deepest_node)
const
759 #ifdef TRIE_CHECK_EVERYTHING
761 SG_DEBUG(
"start_node=%i\n", start_node)
763 if (start_node==NO_CHILD)
765 for (int32_t i=0; i<length; i++)
767 int32_t my_deepest_node ;
768 int32_t depth=find_deepest_node(i, my_deepest_node) ;
769 SG_DEBUG(
"start_node %i depth=%i\n", i, depth)
772 deepest_node=my_deepest_node ;
779 if (TreeMem[start_node].has_seq)
781 for (int32_t q=0; q<16; q++)
782 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
784 deepest_node=start_node ;
787 if (TreeMem[start_node].has_floats)
789 deepest_node=start_node ;
793 for (int32_t q=0; q<4; q++)
795 int32_t my_deepest_node ;
796 if (TreeMem[start_node].children[q]==NO_CHILD)
798 int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
801 deepest_node=my_deepest_node ;
812 template <
class Trie>
814 int32_t start_node, int32_t depth,
float64_t * weights)
820 if (start_node==NO_CHILD)
822 for (int32_t i=0; i<length; i++)
823 compact_nodes(i,1, weights) ;
831 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats)
833 for (int32_t q=0; q<4; q++)
834 if (TreeMem[start_node].child_weights[q]!=0.0)
840 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats)
842 int32_t num_used = 0 ;
845 for (int32_t q=0; q<4; q++)
847 if (TreeMem[start_node].children[q]==NO_CHILD)
856 for (int32_t q=0; q<4; q++)
858 if (TreeMem[start_node].children[q]==NO_CHILD)
860 int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
863 int32_t
node=get_node() ;
865 int32_t last_node=TreeMem[start_node].children[q] ;
868 ASSERT(weights[depth]!=0.0)
869 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
872 TreeMem[node].weight=TreeMem[last_node].weight ;
874 #ifdef TRIE_CHECK_EVERYTHING
875 TreeMem[node].has_seq=true ;
877 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
878 for (int32_t n=0; n<num; n++)
880 ASSERT(depth+n+1<=degree-1)
881 ASSERT(last_node!=NO_CHILD)
882 if (depth+n+1==degree-1)
884 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats)
887 if (TreeMem[last_node].child_weights[k]!=0.0)
891 TreeMem[node].seq[n]=k ;
896 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats)
899 if (TreeMem[last_node].children[k]!=NO_CHILD)
903 TreeMem[node].seq[n]=k ;
904 last_node=TreeMem[last_node].children[k] ;
907 TreeMem[start_node].children[q]=-node ;
914 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
921 template <
class Trie>
925 SG_DEBUG(
"checking nodes %i and %i\n", node, other_node)
926 if (fabs(TreeMem[node].weight-other.
TreeMem[other_node].weight)>=1e-5)
928 SG_DEBUG(
"CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.
TreeMem[other_node].weight)
929 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
931 SG_DEBUG(
"============================================================\n")
933 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
937 #ifdef TRIE_CHECK_EVERYTHING
938 if (TreeMem[node].has_seq!=other.
TreeMem[other_node].has_seq)
940 SG_DEBUG(
"CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.
TreeMem[other_node].has_seq)
941 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
943 SG_DEBUG(
"============================================================\n")
945 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
948 if (TreeMem[node].has_floats!=other.
TreeMem[other_node].has_floats)
950 SG_DEBUG(
"CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.
TreeMem[other_node].has_floats)
953 if (other.
TreeMem[other_node].has_floats)
955 for (int32_t q=0; q<4; q++)
956 if (fabs(TreeMem[node].child_weights[q]-other.
TreeMem[other_node].child_weights[q])>1e-5)
958 SG_DEBUG(
"CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.
TreeMem[other_node].child_weights[q])
959 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
961 SG_DEBUG(
"============================================================\n")
963 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
967 if (other.
TreeMem[other_node].has_seq)
969 for (int32_t q=0; q<16; q++)
970 if ((TreeMem[node].seq[q]!=other.
TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.
TreeMem[other_node].seq[q]<4)))
972 SG_DEBUG(
"CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.
TreeMem[other_node].seq[q])
973 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
975 SG_DEBUG(
"============================================================\n")
977 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
981 if (!other.
TreeMem[other_node].has_seq && !other.
TreeMem[other_node].has_floats)
983 for (int32_t q=0; q<4; q++)
985 if ((TreeMem[node].children[q]==NO_CHILD) && (other.
TreeMem[other_node].children[q]==NO_CHILD))
987 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.
TreeMem[other_node].children[q]==NO_CHILD))
989 SG_DEBUG(
"CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.
TreeMem[other_node].children[q])
990 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n")
992 SG_DEBUG(
"============================================================\n")
994 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n")
997 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.
TreeMem[other_node].children[q])))
1008 template <
class Trie>
1012 for (int32_t i=0; i<length; i++)
1013 if (!compare_traverse(trees[i], other, other.
trees[i]))
1016 SG_DEBUG(
"two tries at %i identical\n", i)
1021 template <
class Trie>
1023 int32_t
node, int32_t * trace, int32_t& trace_len)
const
1025 #ifdef TRIE_CHECK_EVERYTHING
1027 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
1028 if (TreeMem[trace[trace_len-1]].has_seq)
1030 if (TreeMem[trace[trace_len-1]].has_floats)
1033 for (int32_t q=0; q<4; q++)
1035 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
1037 int32_t tl=trace_len+1 ;
1038 if (TreeMem[trace[trace_len-1]].children[q]>=0)
1039 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
1041 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
1043 if (trace[trace_len]==node)
1048 if (find_node(node, trace, tl))
1062 template <
class Trie>
1065 #ifdef TRIE_CHECK_EVERYTHING
1066 int32_t * trace=SG_MALLOC(int32_t, 2*degree);
1067 int32_t trace_len=-1 ;
1068 bool found = false ;
1070 for (tree=0; tree<length; tree++)
1072 trace[0]=trees[tree] ;
1074 found=find_node(node, trace, trace_len) ;
1079 SG_PRINT(
"position %i trace: ", tree)
1081 for (int32_t i=0; i<trace_len-1; i++)
1084 for (int32_t q=0; q<4; q++)
1085 if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
1091 char acgt[5]=
"ACGT" ;
1094 SG_PRINT(
"\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats)
1095 if (TreeMem[node].has_floats)
1097 for (int32_t q=0; q<4; q++)
1098 SG_PRINT(
"child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q])
1100 if (TreeMem[node].has_seq)
1102 for (int32_t q=0; q<16; q++)
1103 SG_PRINT(
"seq[%i] = %i\n", q, TreeMem[node].seq[q])
1105 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
1107 for (int32_t q=0; q<4; q++)
1109 if (TreeMem[node].children[q]!=NO_CHILD)
1111 SG_PRINT(
"children[%i] = %i -> \n", q, TreeMem[node].children[q])
1112 display_node(abs(TreeMem[node].children[q])) ;
1115 SG_PRINT(
"children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q])
1139 for (int32_t i=0; i<length; i++)
1140 trees[i] = NO_CHILD;
1151 delete_trees(get_use_compact_terminal_nodes());
1156 int32_t len,
bool p_use_compact_terminal_nodes)
1160 trees=SG_MALLOC(int32_t, len);
1162 for (int32_t i=0; i<len; i++)
1163 trees[i]=get_node(degree==1);
1166 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
1171 bool p_use_compact_terminal_nodes)
1177 for (int32_t i=0; i<length; i++)
1178 trees[i]=get_node(degree==1);
1180 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
1183 template <
class Trie>
1190 TRIE_ASSERT(tree>=0)
1192 if (depth==degree-2)
1194 ret+=(TreeMem[tree].weight) ;
1196 for (int32_t k=0; k<4; k++)
1197 ret+=(TreeMem[tree].child_weights[k]) ;
1202 ret+=(TreeMem[tree].weight) ;
1204 for (int32_t i=0; i<4; i++)
1205 if (TreeMem[tree].children[i]!=NO_CHILD)
1206 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ;
1212 template <
class Trie>
1216 for (int32_t i=0; i<length*4; i++)
1220 for (int32_t i=0; i<length; i++)
1222 TRIE_ASSERT(trees[i]!=NO_CHILD)
1223 for (int32_t k=0; k<4; k++)
1225 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
1232 template <
class Trie>
1234 int32_t tree, int32_t i,
float64_t alpha,
1235 int32_t *vec, int32_t len_rem,
1236 int32_t degree_rec, int32_t mismatch_rec,
1237 int32_t max_mismatch,
float64_t * weights)
1241 TRIE_ASSERT(tree!=NO_CHILD)
1243 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
1245 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
1247 int32_t subtree = NO_CHILD ;
1249 if (degree_rec==degree-1)
1251 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats)
1252 if (weights_in_tree)
1253 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
1255 if (weights[degree_rec]!=0.0)
1256 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
1257 if (mismatch_rec+1<=max_mismatch)
1258 for (int32_t o=0; o<3; o++)
1260 if (weights_in_tree)
1261 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
1263 if (weights[degree_rec]!=0.0)
1264 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
1270 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
1271 if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
1273 subtree=TreeMem[tree].children[vec[0]] ;
1274 if (weights_in_tree)
1275 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
1277 if (weights[degree_rec]!=0.0)
1278 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
1282 int32_t tmp = get_node(degree_rec==degree-2);
1284 TreeMem[tree].children[vec[0]]=tmp ;
1286 #ifdef TRIE_CHECK_EVERYTHING
1287 if (degree_rec==degree-2)
1288 TreeMem[subtree].has_floats=true ;
1290 if (weights_in_tree)
1291 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
1293 if (weights[degree_rec]!=0.0)
1294 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
1296 TreeMem[subtree].weight = 0.0 ;
1298 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
1300 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
1302 if (mismatch_rec+1<=max_mismatch)
1304 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
1305 for (int32_t o=0; o<3; o++)
1307 int32_t ot = other[vec[0]][o] ;
1308 if (TreeMem[tree].children[ot]!=NO_CHILD)
1310 subtree=TreeMem[tree].children[ot] ;
1311 if (weights_in_tree)
1312 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
1314 if (weights[degree_rec]!=0.0)
1315 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
1319 int32_t tmp = get_node(degree_rec==degree-2);
1321 TreeMem[tree].children[ot]=tmp ;
1323 #ifdef TRIE_CHECK_EVERYTHING
1324 if (degree_rec==degree-2)
1325 TreeMem[subtree].has_floats=true ;
1328 if (weights_in_tree)
1329 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
1331 if (weights[degree_rec]!=0.0)
1332 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
1334 TreeMem[subtree].weight = 0.0 ;
1337 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
1339 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
1345 template <
class Trie>
1347 int32_t tree, int32_t i, int32_t j,
float64_t weight, int32_t d,
1348 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
1359 for (int32_t k=0; k<num_sym; k++)
1361 if (TreeMem[tree].children[k]!=NO_CHILD)
1363 int32_t child=TreeMem[tree].children[k];
1366 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
1368 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
1372 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
1376 else if (j==degree-1)
1378 for (int32_t k=0; k<num_sym; k++)
1381 if (d<max_degree-1 && i<num_feat-1)
1382 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
1384 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
1390 template <
class Trie>
1392 int32_t tree,
const int32_t p,
struct TreeParseInfo info,
1393 const int32_t depth, int32_t*
const x,
const int32_t k)
1395 const int32_t num_sym = info.num_sym;
1396 const int32_t y0 = info.y0;
1397 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
1408 for( sym=0; sym<num_sym; ++sym ) {
1409 const int32_t childNum = TreeMem[tree].children[ sym ];
1410 if( childNum != NO_CHILD ) {
1411 int32_t child = childNum ;
1413 info.substrs[depth+1] = y0 + sym;
1414 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
1416 count( TreeMem[child].weight, depth, info, p, x, k );
1417 traverse( child, p, info, depth+1, x, k );
1422 else if( depth == degree-1 )
1424 for( sym=0; sym<num_sym; ++sym ) {
1425 const float64_t w = TreeMem[tree].child_weights[ sym ];
1428 info.substrs[depth+1] = y0 + sym;
1429 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
1431 count( w, depth, info, p, x, k );
1440 template <
class Trie>
1442 const float64_t w,
const int32_t depth,
const struct TreeParseInfo info,
1443 const int32_t p, int32_t* x,
const int32_t k)
1452 const int32_t nofKmers = info.nofsKmers[k];
1453 const float64_t margWeight = w * info.margFactors[ depth-k ];
1454 const int32_t m_a = depth - k + 1;
1455 const int32_t m_b = info.num_feat - p;
1456 const int32_t m = ( m_a < m_b ) ? m_a : m_b;
1458 const int32_t offset0 = nofKmers * p;
1460 register int32_t offset;
1462 for( i = 0; i < m; ++i ) {
1463 const int32_t y = info.substrs[i+k+1];
1464 info.C_k[ y + offset ] += margWeight;
1469 const int32_t offsR = info.substrs[k+1] + offset0;
1470 info.R_k[offsR] += margWeight;
1472 if( p+depth-k < info.num_feat ) {
1473 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
1474 info.L_k[offsL] += margWeight;
1494 template <
class Trie>
1496 int32_t i, int32_t seq_offset, int32_t * vec,
float32_t alpha,
1497 float64_t *weights,
bool degree_times_position_weights)
1499 int32_t tree = trees[i] ;
1502 int32_t max_depth = 0 ;
1504 if (degree_times_position_weights)
1505 weights_column = &weights[(i+seq_offset)*degree] ;
1507 weights_column = weights ;
1509 if (weights_in_tree)
1511 for (int32_t j=0; (j<degree) && (i+j<length); j++)
1519 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
1521 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4))
1522 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
1524 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
1527 TRIE_ASSERT(j >= degree-16)
1529 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1530 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
1531 int32_t
node = - TreeMem[tree].
children[vec[i+j+seq_offset]] ;
1533 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax))
1534 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq)
1535 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats)
1538 int32_t mismatch_pos = -1 ;
1541 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
1543 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4))
1545 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
1546 fprintf(stderr,
"+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
1547 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER))
1549 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
1559 if (mismatch_pos==-1)
1561 TreeMem[node].weight+=alpha ;
1569 int32_t last_node=tree ;
1573 for (k=0; k<mismatch_pos; k++)
1575 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4))
1576 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k])
1578 int32_t tmp=get_node();
1579 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
1581 if (weights_in_tree)
1582 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
1584 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
1585 TRIE_ASSERT(j+k!=degree-1)
1587 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
1588 fprintf(stderr,
"**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
1589 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER))
1590 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos])
1597 for (int32_t q=0; q<4; q++)
1598 TreeMem[last_node].child_weights[q]=0.0 ;
1599 if (weights_in_tree)
1601 if (TreeMem[node].seq[mismatch_pos]<4)
1602 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
1603 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
1607 if (TreeMem[node].seq[mismatch_pos]<4)
1608 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
1609 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
1612 #ifdef TRIE_CHECK_EVERYTHING
1613 TreeMem[last_node].has_floats=true ;
1619 if (TreeMem[node].seq[mismatch_pos]<4)
1621 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
1624 for (int32_t q=0; q<16; q++)
1626 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
1627 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
1629 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
1631 #ifdef TRIE_CHECK_EVERYTHING
1632 TreeMem[node].has_seq=true ;
1637 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4))
1638 int32_t tmp = get_node() ;
1639 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
1642 TreeMem[last_node].weight = alpha ;
1643 #ifdef TRIE_CHECK_EVERYTHING
1644 TreeMem[last_node].has_seq = true ;
1646 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
1647 for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
1648 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
1655 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
1656 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax))
1657 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1658 if (weights_in_tree)
1659 TreeMem[tree].weight += alpha*weights_column[j];
1661 TreeMem[tree].weight += alpha ;
1664 else if (j==degree-1)
1667 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1668 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats)
1669 if (weights_in_tree)
1670 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
1672 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
1678 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
1679 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1680 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
1682 int32_t tmp = get_node((j==degree-2) && (!use_seq));
1684 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
1686 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
1689 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax))
1690 #ifdef TRIE_CHECK_EVERYTHING
1691 TreeMem[tree].has_seq = use_seq ;
1695 TreeMem[tree].weight = alpha ;
1697 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
1698 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
1701 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
1707 if (weights_in_tree)
1708 TreeMem[tree].weight = alpha*weights_column[j] ;
1710 TreeMem[tree].weight = alpha ;
1711 #ifdef TRIE_CHECK_EVERYTHING
1713 TreeMem[tree].has_floats = true ;
1720 template <
class Trie>
1722 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
1724 bool degree_times_position_weights)
1726 int32_t tree = trees[tree_pos] ;
1728 if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
1732 if (degree_times_position_weights)
1733 weights_column=&weights[weight_pos*degree] ;
1735 weights_column=weights ;
1738 for (int32_t j=0; seq_pos+j < len; j++)
1740 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0))
1742 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1744 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
1745 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1747 tree = - TreeMem[tree].children[vec[seq_pos+j]];
1748 TRIE_ASSERT(tree>=0)
1749 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
1751 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1753 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0))
1754 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1756 this_weight += weights_column[j+k] ;
1758 sum += TreeMem[tree].weight * this_weight ;
1763 tree=TreeMem[tree].children[vec[seq_pos+j]];
1764 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1765 if (weights_in_tree)
1766 sum += TreeMem[tree].weight ;
1768 sum += TreeMem[tree].weight * weights_column[j] ;
1773 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1776 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats)
1777 if (weights_in_tree)
1778 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1780 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
1783 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats)
1789 if (position_weights!=NULL)
1790 return sum*position_weights[weight_pos] ;
1795 template <
class Trie>
1797 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
1799 int32_t mkl_stepsize,
float64_t * weights,
1800 bool degree_times_position_weights)
1802 int32_t tree = trees[tree_pos] ;
1806 if (position_weights!=NULL)
1808 factor *= position_weights[weight_pos] ;
1811 if (!degree_times_position_weights)
1813 for (int32_t j=0; seq_pos+j<len; j++)
1815 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1817 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1819 tree = -TreeMem[tree].children[vec[seq_pos+j]];
1820 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
1821 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1823 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1825 if (weights_in_tree)
1826 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
1828 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
1834 tree=TreeMem[tree].children[vec[seq_pos+j]];
1835 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1836 if (weights_in_tree)
1837 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
1839 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
1844 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1847 if (weights_in_tree)
1848 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1850 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
1857 for (int32_t j=0; seq_pos+j<len; j++)
1859 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1861 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1863 tree = -TreeMem[tree].children[vec[seq_pos+j]];
1864 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
1865 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1867 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1869 if (weights_in_tree)
1870 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
1872 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
1878 tree=TreeMem[tree].children[vec[seq_pos+j]];
1879 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1880 if (weights_in_tree)
1881 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
1883 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
1888 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1891 if (weights_in_tree)
1892 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1894 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
1901 else if (!degree_times_position_weights)
1903 for (int32_t j=0; seq_pos+j<len; j++)
1905 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1907 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1909 tree = -TreeMem[tree].children[vec[seq_pos+j]];
1910 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
1911 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1913 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1915 if (weights_in_tree)
1916 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
1918 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
1924 tree=TreeMem[tree].children[vec[seq_pos+j]];
1925 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1926 if (weights_in_tree)
1927 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
1929 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
1934 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1937 if (weights_in_tree)
1938 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1940 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
1966 for (int32_t j=0; seq_pos+j<len; j++)
1968 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1970 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1972 tree = -TreeMem[tree].children[vec[seq_pos+j]];
1973 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq)
1974 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1976 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1978 if (weights_in_tree)
1979 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
1981 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
1987 tree=TreeMem[tree].children[vec[seq_pos+j]];
1988 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
1989 if (weights_in_tree)
1990 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
1992 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
1997 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq)
2000 if (weights_in_tree)
2001 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
2003 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
2011 template <
class Trie>
2013 Trie* tree, int32_t depth, uint64_t seq,
float64_t value,
2018 if (weights_in_tree || depth==0)
2019 value+=tree->weight;
2022 w=weights[degree-1];
2023 value+=weights[depth-1]*tree->weight;
2026 if (degree-1==depth)
2028 for (int32_t sym=0; sym<4; sym++)
2033 ConsensusEntry entry;
2035 entry.score=value+v;
2036 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
2044 for (int32_t sym=0; sym<4; sym++)
2046 uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
2047 if (tree->children[sym] != NO_CHILD)
2048 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
2053 template <
class Trie>
2055 int32_t pos, uint64_t seq, int32_t deg,
float64_t* weights)
2061 for (int32_t i=pos; i<pos+deg && i<length; i++)
2064 Trie* tree = &TreeMem[trees[i]];
2066 for (int32_t d=0; d<deg-i+pos; d++)
2070 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
2073 if (!weights_in_tree)
2076 ASSERT(tree->children[sym] != NO_CHILD)
2077 tree=&TreeMem[tree->children[sym]];
2078 result+=w*tree->weight;
2085 template <
class Trie>
2090 ASSERT(pos>=0 && pos<length)
2091 ASSERT(!use_compact_terminal_nodes)
2093 Trie* t = &TreeMem[trees[pos]];
2095 fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
2101 for (int32_t i=0; i<num_cur; i++)
2104 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
2117 for (int32_t i=0; i<num_cur; i++)
2120 uint64_t str_cur= cur->
get_element(i).string >> 2;
2126 for (int32_t j=0; j<num_prev; j++)
2130 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
2131 uint64_t str_prev= mask & prev->
get_element(j).string;
2134 if (str_cur == str_prev)
2137 if (bt==-1 || sc>max_score)
2148 ConsensusEntry entry;
2150 entry.score=max_score;
2158 #endif // _TRIE_H___