24 #ifndef DOXYGEN_SHOULD_SKIP_THIS
27 #define NO_CHILD ((int32_t)-1073741824)
29 #define WEIGHTS_IN_TRIE
32 #ifdef TRIE_CHECK_EVERYTHING
33 #define TRIE_ASSERT_EVERYTHING(x) ASSERT(x)
35 #define TRIE_ASSERT_EVERYTHING(x)
39 #define TRIE_ASSERT(x)
41 #define TRIE_TERMINAL_CHARACTER 7
59 #ifdef TRIE_CHECK_EVERYTHING
88 #ifdef TRIE_CHECK_EVERYTHING
106 struct TreeParseInfo {
133 #endif // DOXYGEN_SHOULD_SKIP_THIS
135 template <
class Trie>
class CTrie;
137 #define IGNORE_IN_CLASSLIST
168 CTrie(int32_t d,
bool p_use_compact_terminal_nodes=
true);
185 int32_t
node,
const CTrie & other, int32_t other_node);
200 bool find_node(int32_t node, int32_t * trace, int32_t &trace_len)
const;
209 int32_t start_node, int32_t &deepest_node)
const;
232 void create(int32_t len,
bool p_use_compact_terminal_nodes=
true);
239 void delete_trees(
bool p_use_compact_terminal_nodes=
true);
252 int32_t i, int32_t seq_offset, int32_t* vec,
float32_t alpha,
253 float64_t *weights,
bool degree_times_position_weights);
283 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
285 bool degree_times_position_weights) ;
302 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
304 int32_t mkl_stepsize,
float64_t * weights,
305 bool degree_times_position_weights);
322 int32_t tree, int32_t i, int32_t j,
float64_t weight, int32_t d,
323 int32_t max_degree, int32_t num_feat, int32_t num_sym,
324 int32_t sym_offset, int32_t offs,
float64_t* result);
339 int32_t tree, int32_t i,
float64_t alpha, int32_t *vec,
340 int32_t len_rem, int32_t degree_rec, int32_t mismatch_rec,
341 int32_t max_mismatch,
float64_t * weights);
353 int32_t tree,
const int32_t p,
struct TreeParseInfo info,
354 const int32_t depth, int32_t*
const x,
const int32_t k);
367 const struct TreeParseInfo info,
const int32_t p, int32_t* x,
387 int32_t pos, uint64_t seq, int32_t deg,
float64_t* weights);
399 Trie* tree, int32_t depth, uint64_t seq,
float64_t value,
438 const int32_t parentIdx,
const int32_t sym,
const int32_t depth,
448 float64_t*
const*
const poims,
const int32_t K,
449 const int32_t debug);
466 bool p_use_compact_terminal_nodes)
500 for (int32_t q=0; q<4; q++)
501 TreeMem[ret].child_weights[q]=0.0;
505 for (int32_t q=0; q<4; q++)
506 TreeMem[ret].children[q]=NO_CHILD;
508 #ifdef TRIE_CHECK_EVERYTHING
510 TreeMem[ret].has_floats=false ;
521 SG_DEBUG(
"Extending TreeMem from %i to %i elements\n",
523 TreeMemPtrMax = (int32_t) ((
float64_t)TreeMemPtrMax*1.2);
555 const int32_t nodeIdx,
const int32_t depth,
const int32_t offset,
556 const int32_t y0,
float64_t*
const*
const W,
const int32_t K);
571 const float64_t*
const distrib,
const int32_t i,
572 const int32_t nodeIdx, int32_t left_tries_idx[4],
573 const int32_t depth, int32_t
const lastSym,
float64_t* S,
588 const float64_t*
const distrib,
const int32_t i,
589 const int32_t nodeIdx, int32_t left_tries_idx[4],
603 const int32_t nodeIdx,
const int32_t depth,
const int32_t i,
604 const int32_t y0,
float64_t*
const*
const poims,
const int32_t K,
605 const int32_t debug);
621 float64_t*
const*
const poims,
const int32_t K,
const int32_t k,
622 const int32_t i,
const int32_t y,
const float64_t valW,
624 const int32_t debug);
627 inline virtual const char*
get_name()
const {
return "Trie"; }
659 template <
class Trie>
661 :
CSGObject(), degree(0), position_weights(NULL),
662 use_compact_terminal_nodes(false),
663 weights_in_tree(true)
676 template <
class Trie>
678 :
CSGObject(), degree(d), position_weights(NULL),
679 use_compact_terminal_nodes(p_use_compact_terminal_nodes),
680 weights_in_tree(true)
692 template <
class Trie>
694 :
CSGObject(to_copy), degree(to_copy.degree), position_weights(NULL),
695 use_compact_terminal_nodes(to_copy.use_compact_terminal_nodes)
714 for (int32_t i=0; i<
length; i++)
720 template <
class Trie>
727 position_weights=NULL ;
736 position_weights=NULL ;
741 TreeMem =
SG_MALLOC(Trie, TreeMemPtrMax);
742 memcpy(TreeMem, to_copy.
TreeMem, TreeMemPtrMax*
sizeof(Trie)) ;
748 for (int32_t i=0; i<length; i++)
749 trees[i]=to_copy.
trees[i] ;
754 template <
class Trie>
756 int32_t start_node, int32_t& deepest_node)
const
758 #ifdef TRIE_CHECK_EVERYTHING
760 SG_DEBUG(
"start_node=%i\n", start_node) ;
762 if (start_node==NO_CHILD)
764 for (int32_t i=0; i<length; i++)
766 int32_t my_deepest_node ;
767 int32_t depth=find_deepest_node(i, my_deepest_node) ;
768 SG_DEBUG(
"start_node %i depth=%i\n", i, depth) ;
771 deepest_node=my_deepest_node ;
778 if (TreeMem[start_node].has_seq)
780 for (int32_t q=0; q<16; q++)
781 if (TreeMem[start_node].seq[q]!=TRIE_TERMINAL_CHARACTER)
783 deepest_node=start_node ;
786 if (TreeMem[start_node].has_floats)
788 deepest_node=start_node ;
792 for (int32_t q=0; q<4; q++)
794 int32_t my_deepest_node ;
795 if (TreeMem[start_node].children[q]==NO_CHILD)
797 int32_t depth=find_deepest_node(abs(TreeMem[start_node].children[q]), my_deepest_node) ;
800 deepest_node=my_deepest_node ;
811 template <
class Trie>
813 int32_t start_node, int32_t depth,
float64_t * weights)
819 if (start_node==NO_CHILD)
821 for (int32_t i=0; i<length; i++)
822 compact_nodes(i,1, weights) ;
830 TRIE_ASSERT_EVERYTHING(TreeMem[start_node].has_floats) ;
832 for (int32_t q=0; q<4; q++)
833 if (TreeMem[start_node].child_weights[q]!=0.0)
839 TRIE_ASSERT_EVERYTHING(!TreeMem[start_node].has_floats) ;
841 int32_t num_used = 0 ;
844 for (int32_t q=0; q<4; q++)
846 if (TreeMem[start_node].children[q]==NO_CHILD)
855 for (int32_t q=0; q<4; q++)
857 if (TreeMem[start_node].children[q]==NO_CHILD)
859 int32_t num=compact_nodes(abs(TreeMem[start_node].children[q]), depth+1, weights) ;
862 int32_t
node=get_node() ;
864 int32_t last_node=TreeMem[start_node].children[q] ;
867 ASSERT(weights[depth]!=0.0) ;
868 TreeMem[node].weight=TreeMem[last_node].weight/weights[depth] ;
871 TreeMem[node].weight=TreeMem[last_node].weight ;
873 #ifdef TRIE_CHECK_EVERYTHING
874 TreeMem[node].has_seq=true ;
876 memset(TreeMem[node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
877 for (int32_t n=0; n<num; n++)
879 ASSERT(depth+n+1<=degree-1) ;
880 ASSERT(last_node!=NO_CHILD) ;
881 if (depth+n+1==degree-1)
883 TRIE_ASSERT_EVERYTHING(TreeMem[last_node].has_floats) ;
886 if (TreeMem[last_node].child_weights[k]!=0.0)
890 TreeMem[node].seq[n]=k ;
895 TRIE_ASSERT_EVERYTHING(!TreeMem[last_node].has_floats) ;
898 if (TreeMem[last_node].children[k]!=NO_CHILD)
902 TreeMem[node].seq[n]=k ;
903 last_node=TreeMem[last_node].children[k] ;
906 TreeMem[start_node].children[q]=-node ;
913 ret=compact_nodes(abs(TreeMem[start_node].children[q_used]), depth+1, weights) ;
920 template <
class Trie>
924 SG_DEBUG(
"checking nodes %i and %i\n", node, other_node) ;
925 if (fabs(TreeMem[node].weight-other.
TreeMem[other_node].weight)>=1e-5)
927 SG_DEBUG(
"CTrie::compare: TreeMem[%i].weight=%f!=other.TreeMem[%i].weight=%f\n", node, TreeMem[node].weight, other_node,other.
TreeMem[other_node].weight) ;
928 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
930 SG_DEBUG(
"============================================================\n") ;
932 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
936 #ifdef TRIE_CHECK_EVERYTHING
937 if (TreeMem[node].has_seq!=other.
TreeMem[other_node].has_seq)
939 SG_DEBUG(
"CTrie::compare: TreeMem[%i].has_seq=%i!=other.TreeMem[%i].has_seq=%i\n", node, TreeMem[node].has_seq, other_node,other.
TreeMem[other_node].has_seq) ;
940 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
942 SG_DEBUG(
"============================================================\n") ;
944 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
947 if (TreeMem[node].has_floats!=other.
TreeMem[other_node].has_floats)
949 SG_DEBUG(
"CTrie::compare: TreeMem[%i].has_floats=%i!=other.TreeMem[%i].has_floats=%i\n", node, TreeMem[node].has_floats, other_node, other.
TreeMem[other_node].has_floats) ;
952 if (other.
TreeMem[other_node].has_floats)
954 for (int32_t q=0; q<4; q++)
955 if (fabs(TreeMem[node].child_weights[q]-other.
TreeMem[other_node].child_weights[q])>1e-5)
957 SG_DEBUG(
"CTrie::compare: TreeMem[%i].child_weights[%i]=%e!=other.TreeMem[%i].child_weights[%i]=%e\n", node, q,TreeMem[node].child_weights[q], other_node,q,other.
TreeMem[other_node].child_weights[q]) ;
958 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
960 SG_DEBUG(
"============================================================\n") ;
962 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
966 if (other.
TreeMem[other_node].has_seq)
968 for (int32_t q=0; q<16; q++)
969 if ((TreeMem[node].seq[q]!=other.
TreeMem[other_node].seq[q]) && ((TreeMem[node].seq[q]<4)||(other.
TreeMem[other_node].seq[q]<4)))
971 SG_DEBUG(
"CTrie::compare: TreeMem[%i].seq[%i]=%i!=other.TreeMem[%i].seq[%i]=%i\n", node,q,TreeMem[node].seq[q], other_node,q,other.
TreeMem[other_node].seq[q]) ;
972 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
974 SG_DEBUG(
"============================================================\n") ;
976 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
980 if (!other.
TreeMem[other_node].has_seq && !other.
TreeMem[other_node].has_floats)
982 for (int32_t q=0; q<4; q++)
984 if ((TreeMem[node].children[q]==NO_CHILD) && (other.
TreeMem[other_node].children[q]==NO_CHILD))
986 if ((TreeMem[node].children[q]==NO_CHILD)!=(other.
TreeMem[other_node].children[q]==NO_CHILD))
988 SG_DEBUG(
"CTrie::compare: TreeMem[%i].children[%i]=%i!=other.TreeMem[%i].children[%i]=%i\n", node,q,TreeMem[node].children[q], other_node,q,other.
TreeMem[other_node].children[q]) ;
989 SG_DEBUG(
">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n") ;
991 SG_DEBUG(
"============================================================\n") ;
993 SG_DEBUG(
"<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<\n") ;
996 if (!compare_traverse(abs(TreeMem[node].children[q]), other, abs(other.
TreeMem[other_node].children[q])))
1007 template <
class Trie>
1011 for (int32_t i=0; i<length; i++)
1012 if (!compare_traverse(trees[i], other, other.
trees[i]))
1015 SG_DEBUG(
"two tries at %i identical\n", i) ;
1020 template <
class Trie>
1022 int32_t
node, int32_t * trace, int32_t& trace_len)
const
1024 #ifdef TRIE_CHECK_EVERYTHING
1026 ASSERT((trace[trace_len-1]>=0) && (trace[trace_len-1]<TreeMemPtrMax))
1027 if (TreeMem[trace[trace_len-1]].has_seq)
1029 if (TreeMem[trace[trace_len-1]].has_floats)
1032 for (int32_t q=0; q<4; q++)
1034 if (TreeMem[trace[trace_len-1]].children[q]==NO_CHILD)
1036 int32_t tl=trace_len+1 ;
1037 if (TreeMem[trace[trace_len-1]].children[q]>=0)
1038 trace[trace_len]=TreeMem[trace[trace_len-1]].children[q] ;
1040 trace[trace_len]=-TreeMem[trace[trace_len-1]].children[q] ;
1042 if (trace[trace_len]==node)
1047 if (find_node(node, trace, tl))
1061 template <
class Trie>
1064 #ifdef TRIE_CHECK_EVERYTHING
1065 int32_t * trace=
SG_MALLOC(int32_t, 2*degree);
1066 int32_t trace_len=-1 ;
1067 bool found = false ;
1069 for (tree=0; tree<length; tree++)
1071 trace[0]=trees[tree] ;
1073 found=find_node(node, trace, trace_len) ;
1078 SG_PRINT(
"position %i trace: ", tree) ;
1080 for (int32_t i=0; i<trace_len-1; i++)
1083 for (int32_t q=0; q<4; q++)
1084 if (abs(TreeMem[trace[i]].children[q])==trace[i+1])
1090 char acgt[5]=
"ACGT" ;
1093 SG_PRINT(
"\nnode=%i\nweight=%f\nhas_seq=%i\nhas_floats=%i\n", node, TreeMem[node].weight, TreeMem[node].has_seq, TreeMem[node].has_floats) ;
1094 if (TreeMem[node].has_floats)
1096 for (int32_t q=0; q<4; q++)
1097 SG_PRINT(
"child_weighs[%i] = %f\n", q, TreeMem[node].child_weights[q]) ;
1099 if (TreeMem[node].has_seq)
1101 for (int32_t q=0; q<16; q++)
1102 SG_PRINT(
"seq[%i] = %i\n", q, TreeMem[node].seq[q]) ;
1104 if (!TreeMem[node].has_seq && !TreeMem[node].has_floats)
1106 for (int32_t q=0; q<4; q++)
1108 if (TreeMem[node].children[q]!=NO_CHILD)
1110 SG_PRINT(
"children[%i] = %i -> \n", q, TreeMem[node].children[q]) ;
1111 display_node(abs(TreeMem[node].children[q])) ;
1114 SG_PRINT(
"children[%i] = NO_CHILD -| \n", q, TreeMem[node].children[q]) ;
1138 for (int32_t i=0; i<length; i++)
1139 trees[i] = NO_CHILD;
1150 delete_trees(get_use_compact_terminal_nodes());
1155 int32_t len,
bool p_use_compact_terminal_nodes)
1161 for (int32_t i=0; i<len; i++)
1162 trees[i]=get_node(degree==1);
1165 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
1170 bool p_use_compact_terminal_nodes)
1176 for (int32_t i=0; i<length; i++)
1177 trees[i]=get_node(degree==1);
1179 use_compact_terminal_nodes=p_use_compact_terminal_nodes ;
1182 template <
class Trie>
1189 TRIE_ASSERT(tree>=0) ;
1191 if (depth==degree-2)
1193 ret+=(TreeMem[tree].weight) ;
1195 for (int32_t k=0; k<4; k++)
1196 ret+=(TreeMem[tree].child_weights[k]) ;
1201 ret+=(TreeMem[tree].weight) ;
1203 for (int32_t i=0; i<4; i++)
1204 if (TreeMem[tree].children[i]!=NO_CHILD)
1205 ret += compute_abs_weights_tree(TreeMem[tree].children[i], depth+1) ;
1211 template <
class Trie>
1215 for (int32_t i=0; i<length*4; i++)
1219 for (int32_t i=0; i<length; i++)
1221 TRIE_ASSERT(trees[i]!=NO_CHILD) ;
1222 for (int32_t k=0; k<4; k++)
1224 sum[i*4+k]=compute_abs_weights_tree(TreeMem[trees[i]].children[k], 0) ;
1231 template <
class Trie>
1233 int32_t tree, int32_t i,
float64_t alpha,
1234 int32_t *vec, int32_t len_rem,
1235 int32_t degree_rec, int32_t mismatch_rec,
1236 int32_t max_mismatch,
float64_t * weights)
1240 TRIE_ASSERT(tree!=NO_CHILD) ;
1242 if ((len_rem<=0) || (mismatch_rec>max_mismatch) || (degree_rec>degree))
1244 const int32_t other[4][3] = { {1,2,3},{0,2,3},{0,1,3},{0,1,2} } ;
1246 int32_t subtree = NO_CHILD ;
1248 if (degree_rec==degree-1)
1250 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
1251 if (weights_in_tree)
1252 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec];
1254 if (weights[degree_rec]!=0.0)
1255 TreeMem[tree].child_weights[vec[0]] += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
1256 if (mismatch_rec+1<=max_mismatch)
1257 for (int32_t o=0; o<3; o++)
1259 if (weights_in_tree)
1260 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
1262 if (weights[degree_rec]!=0.0)
1263 TreeMem[tree].child_weights[other[vec[0]][o]] += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
1269 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
1270 if (TreeMem[tree].children[vec[0]]!=NO_CHILD)
1272 subtree=TreeMem[tree].children[vec[0]] ;
1273 if (weights_in_tree)
1274 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec];
1276 if (weights[degree_rec]!=0.0)
1277 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec];
1281 int32_t tmp = get_node(degree_rec==degree-2);
1283 TreeMem[tree].children[vec[0]]=tmp ;
1285 #ifdef TRIE_CHECK_EVERYTHING
1286 if (degree_rec==degree-2)
1287 TreeMem[subtree].has_floats=true ;
1289 if (weights_in_tree)
1290 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec] ;
1292 if (weights[degree_rec]!=0.0)
1293 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*mismatch_rec]/weights[degree_rec] ;
1295 TreeMem[subtree].weight = 0.0 ;
1297 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
1299 degree_rec+1, mismatch_rec, max_mismatch, weights) ;
1301 if (mismatch_rec+1<=max_mismatch)
1303 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
1304 for (int32_t o=0; o<3; o++)
1306 int32_t ot = other[vec[0]][o] ;
1307 if (TreeMem[tree].children[ot]!=NO_CHILD)
1309 subtree=TreeMem[tree].children[ot] ;
1310 if (weights_in_tree)
1311 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)];
1313 if (weights[degree_rec]!=0.0)
1314 TreeMem[subtree].weight += alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec];
1318 int32_t tmp = get_node(degree_rec==degree-2);
1320 TreeMem[tree].children[ot]=tmp ;
1322 #ifdef TRIE_CHECK_EVERYTHING
1323 if (degree_rec==degree-2)
1324 TreeMem[subtree].has_floats=true ;
1327 if (weights_in_tree)
1328 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)] ;
1330 if (weights[degree_rec]!=0.0)
1331 TreeMem[subtree].weight = alpha*weights[degree_rec+degree*(mismatch_rec+1)]/weights[degree_rec] ;
1333 TreeMem[subtree].weight = 0.0 ;
1336 add_example_to_tree_mismatch_recursion(subtree, i, alpha,
1338 degree_rec+1, mismatch_rec+1, max_mismatch, weights) ;
1344 template <
class Trie>
1346 int32_t tree, int32_t i, int32_t j,
float64_t weight, int32_t d,
1347 int32_t max_degree, int32_t num_feat, int32_t num_sym, int32_t sym_offset,
1358 for (int32_t k=0; k<num_sym; k++)
1360 if (TreeMem[tree].children[k]!=NO_CHILD)
1362 int32_t child=TreeMem[tree].children[k];
1365 compute_scoring_helper(child, i, j+1, weight+decay*TreeMem[child].weight, d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
1367 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[child].weight;
1371 compute_scoring_helper(child, i, j+1, 0.0, 0, max_degree, num_feat, num_sym, sym_offset, offs, result);
1375 else if (j==degree-1)
1377 for (int32_t k=0; k<num_sym; k++)
1380 if (d<max_degree-1 && i<num_feat-1)
1381 compute_scoring_helper(trees[i+1], i+1, 0, weight+decay*TreeMem[tree].child_weights[k], d+1, max_degree, num_feat, num_sym, sym_offset, num_sym*offs+k, result);
1383 result[sym_offset*(i+j-max_degree+1)+num_sym*offs+k] += weight+decay*TreeMem[tree].child_weights[k];
1389 template <
class Trie>
1391 int32_t tree,
const int32_t p,
struct TreeParseInfo info,
1392 const int32_t depth, int32_t*
const x,
const int32_t k)
1394 const int32_t num_sym = info.num_sym;
1395 const int32_t y0 = info.y0;
1396 const int32_t y1 = (k==0) ? 0 : y0 - ( (depth<k) ? 0 : info.nofsKmers[k-1] * x[depth-k] );
1403 ASSERT( depth < degree );
1407 for( sym=0; sym<num_sym; ++sym ) {
1408 const int32_t childNum = TreeMem[tree].children[ sym ];
1409 if( childNum != NO_CHILD ) {
1410 int32_t child = childNum ;
1412 info.substrs[depth+1] = y0 + sym;
1413 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
1415 count( TreeMem[child].weight, depth, info, p, x, k );
1416 traverse( child, p, info, depth+1, x, k );
1421 else if( depth == degree-1 )
1423 for( sym=0; sym<num_sym; ++sym ) {
1424 const float64_t w = TreeMem[tree].child_weights[ sym ];
1427 info.substrs[depth+1] = y0 + sym;
1428 info.y0 = (k==0) ? 0 : (y1+sym)*num_sym;
1430 count( w, depth, info, p, x, k );
1439 template <
class Trie>
1441 const float64_t w,
const int32_t depth,
const struct TreeParseInfo info,
1442 const int32_t p, int32_t* x,
const int32_t k)
1444 ASSERT( fabs(w) < 1e10 );
1446 ASSERT( x[depth+1] < 0 );
1451 const int32_t nofKmers = info.nofsKmers[k];
1452 const float64_t margWeight = w * info.margFactors[ depth-k ];
1453 const int32_t m_a = depth - k + 1;
1454 const int32_t m_b = info.num_feat - p;
1455 const int32_t m = ( m_a < m_b ) ? m_a : m_b;
1457 const int32_t offset0 = nofKmers * p;
1459 register int32_t offset;
1461 for( i = 0; i < m; ++i ) {
1462 const int32_t y = info.substrs[i+k+1];
1463 info.C_k[ y + offset ] += margWeight;
1468 const int32_t offsR = info.substrs[k+1] + offset0;
1469 info.R_k[offsR] += margWeight;
1471 if( p+depth-k < info.num_feat ) {
1472 const int32_t offsL = info.substrs[depth+1] + nofKmers * (p+depth-k);
1473 info.L_k[offsL] += margWeight;
1493 template <
class Trie>
1495 int32_t i, int32_t seq_offset, int32_t * vec,
float32_t alpha,
1496 float64_t *weights,
bool degree_times_position_weights)
1498 int32_t tree = trees[i] ;
1501 int32_t max_depth = 0 ;
1503 if (degree_times_position_weights)
1504 weights_column = &weights[(i+seq_offset)*degree] ;
1506 weights_column = weights ;
1508 if (weights_in_tree)
1510 for (int32_t j=0; (j<degree) && (i+j<length); j++)
1518 for (int32_t j=0; (j<max_depth) && (i+j+seq_offset<length); j++)
1520 TRIE_ASSERT((vec[i+j+seq_offset]>=0) && (vec[i+j+seq_offset]<4)) ;
1521 if ((j<degree-1) && (TreeMem[tree].children[vec[i+j+seq_offset]]!=NO_CHILD))
1523 if (TreeMem[tree].children[vec[i+j+seq_offset]]<0)
1526 TRIE_ASSERT(j >= degree-16) ;
1528 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1529 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
1530 int32_t
node = - TreeMem[tree].
children[vec[i+j+seq_offset]] ;
1532 TRIE_ASSERT((node>=0) && (node<=TreeMemPtrMax)) ;
1533 TRIE_ASSERT_EVERYTHING(TreeMem[node].has_seq) ;
1534 TRIE_ASSERT_EVERYTHING(!TreeMem[node].has_floats) ;
1537 int32_t mismatch_pos = -1 ;
1540 for (k=0; (j+k<max_depth) && (i+j+seq_offset+k<length); k++)
1542 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
1544 if ((TreeMem[node].seq[k]>=4) && (TreeMem[node].seq[k]!=TRIE_TERMINAL_CHARACTER))
1545 fprintf(stderr,
"+++i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[k]) ;
1546 TRIE_ASSERT((TreeMem[node].seq[k]<4) || (TreeMem[node].seq[k]==TRIE_TERMINAL_CHARACTER)) ;
1548 if (TreeMem[node].seq[k]!=vec[i+j+seq_offset+k])
1558 if (mismatch_pos==-1)
1560 TreeMem[node].weight+=alpha ;
1568 int32_t last_node=tree ;
1572 for (k=0; k<mismatch_pos; k++)
1574 TRIE_ASSERT((vec[i+j+seq_offset+k]>=0) && (vec[i+j+seq_offset+k]<4)) ;
1575 TRIE_ASSERT(vec[i+j+seq_offset+k]==TreeMem[node].seq[k]) ;
1577 int32_t tmp=get_node();
1578 TreeMem[last_node].children[vec[i+j+seq_offset+k]]=tmp ;
1580 if (weights_in_tree)
1581 TreeMem[last_node].weight = (TreeMem[node].weight+alpha)*weights_column[j+k] ;
1583 TreeMem[last_node].weight = (TreeMem[node].weight+alpha) ;
1584 TRIE_ASSERT(j+k!=degree-1) ;
1586 if ((TreeMem[node].seq[mismatch_pos]>=4) && (TreeMem[node].seq[mismatch_pos]!=TRIE_TERMINAL_CHARACTER))
1587 fprintf(stderr,
"**i=%i j=%i seq[%i]=%i\n", i, j, k, TreeMem[node].seq[mismatch_pos]) ;
1588 ASSERT((TreeMem[node].seq[mismatch_pos]<4) || (TreeMem[node].seq[mismatch_pos]==TRIE_TERMINAL_CHARACTER)) ;
1589 TRIE_ASSERT(vec[i+j+seq_offset+mismatch_pos]!=TreeMem[node].seq[mismatch_pos]) ;
1596 for (int32_t q=0; q<4; q++)
1597 TreeMem[last_node].child_weights[q]=0.0 ;
1598 if (weights_in_tree)
1600 if (TreeMem[node].seq[mismatch_pos]<4)
1601 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]+=TreeMem[node].weight*weights_column[degree-1] ;
1602 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] += alpha*weights_column[degree-1] ;
1606 if (TreeMem[node].seq[mismatch_pos]<4)
1607 TreeMem[last_node].child_weights[TreeMem[node].seq[mismatch_pos]]=TreeMem[node].weight ;
1608 TreeMem[last_node].child_weights[vec[i+j+seq_offset+k]] = alpha ;
1611 #ifdef TRIE_CHECK_EVERYTHING
1612 TreeMem[last_node].has_floats=true ;
1618 if (TreeMem[node].seq[mismatch_pos]<4)
1620 TreeMem[last_node].children[TreeMem[node].seq[mismatch_pos]] = -node ;
1623 for (int32_t q=0; q<16; q++)
1625 if ((j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length))
1626 TreeMem[node].seq[q] = TreeMem[node].seq[q+mismatch_pos] ;
1628 TreeMem[node].seq[q] = TRIE_TERMINAL_CHARACTER ;
1630 #ifdef TRIE_CHECK_EVERYTHING
1631 TreeMem[node].has_seq=true ;
1636 TRIE_ASSERT((vec[i+j+seq_offset+mismatch_pos]>=0) && (vec[i+j+seq_offset+mismatch_pos]<4)) ;
1637 int32_t tmp = get_node() ;
1638 TreeMem[last_node].children[vec[i+j+seq_offset+mismatch_pos]] = -tmp ;
1641 TreeMem[last_node].weight = alpha ;
1642 #ifdef TRIE_CHECK_EVERYTHING
1643 TreeMem[last_node].has_seq = true ;
1645 memset(TreeMem[last_node].seq, TRIE_TERMINAL_CHARACTER, 16) ;
1646 for (int32_t q=0; (j+q+mismatch_pos<degree) && (i+j+seq_offset+q+mismatch_pos<length); q++)
1647 TreeMem[last_node].seq[q] = vec[i+j+seq_offset+mismatch_pos+q] ;
1654 tree=TreeMem[tree].children[vec[i+j+seq_offset]] ;
1655 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
1656 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1657 if (weights_in_tree)
1658 TreeMem[tree].weight += alpha*weights_column[j];
1660 TreeMem[tree].weight += alpha ;
1663 else if (j==degree-1)
1666 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1667 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
1668 if (weights_in_tree)
1669 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha*weights_column[j] ;
1671 TreeMem[tree].child_weights[vec[i+j+seq_offset]] += alpha;
1677 bool use_seq = use_compact_terminal_nodes && (j>degree-16) ;
1678 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1679 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
1681 int32_t tmp = get_node((j==degree-2) && (!use_seq));
1683 TreeMem[tree].children[vec[i+j+seq_offset]] = -tmp ;
1685 TreeMem[tree].children[vec[i+j+seq_offset]] = tmp ;
1688 TRIE_ASSERT((tree>=0) && (tree<TreeMemPtrMax)) ;
1689 #ifdef TRIE_CHECK_EVERYTHING
1690 TreeMem[tree].has_seq = use_seq ;
1694 TreeMem[tree].weight = alpha ;
1696 memset(TreeMem[tree].seq, TRIE_TERMINAL_CHARACTER, 16) ;
1697 for (int32_t q=0; (j+q<degree) && (i+j+seq_offset+q<length); q++)
1700 TreeMem[tree].seq[q]=vec[i+j+seq_offset+q] ;
1706 if (weights_in_tree)
1707 TreeMem[tree].weight = alpha*weights_column[j] ;
1709 TreeMem[tree].weight = alpha ;
1710 #ifdef TRIE_CHECK_EVERYTHING
1712 TreeMem[tree].has_floats = true ;
1719 template <
class Trie>
1721 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
1723 bool degree_times_position_weights)
1725 int32_t tree = trees[tree_pos] ;
1727 if ((position_weights!=NULL) && (position_weights[weight_pos]==0))
1731 if (degree_times_position_weights)
1732 weights_column=&weights[weight_pos*degree] ;
1734 weights_column=weights ;
1737 for (int32_t j=0; seq_pos+j < len; j++)
1739 TRIE_ASSERT((vec[seq_pos+j]<4) && (vec[seq_pos+j]>=0)) ;
1741 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1743 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
1744 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1746 tree = - TreeMem[tree].children[vec[seq_pos+j]];
1747 TRIE_ASSERT(tree>=0) ;
1748 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
1750 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1752 TRIE_ASSERT((vec[seq_pos+j+k]<4) && (vec[seq_pos+j+k]>=0)) ;
1753 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1755 this_weight += weights_column[j+k] ;
1757 sum += TreeMem[tree].weight * this_weight ;
1762 tree=TreeMem[tree].children[vec[seq_pos+j]];
1763 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1764 if (weights_in_tree)
1765 sum += TreeMem[tree].weight ;
1767 sum += TreeMem[tree].weight * weights_column[j] ;
1772 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1775 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_floats) ;
1776 if (weights_in_tree)
1777 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1779 sum += TreeMem[tree].child_weights[vec[seq_pos+j]] * weights_column[j] ;
1782 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_floats) ;
1788 if (position_weights!=NULL)
1789 return sum*position_weights[weight_pos] ;
1794 template <
class Trie>
1796 int32_t* vec, int32_t len, int32_t seq_pos, int32_t tree_pos,
1798 int32_t mkl_stepsize,
float64_t * weights,
1799 bool degree_times_position_weights)
1801 int32_t tree = trees[tree_pos] ;
1805 if (position_weights!=NULL)
1807 factor *= position_weights[weight_pos] ;
1810 if (!degree_times_position_weights)
1812 for (int32_t j=0; seq_pos+j<len; j++)
1814 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1816 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1818 tree = -TreeMem[tree].children[vec[seq_pos+j]];
1819 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
1820 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1822 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1824 if (weights_in_tree)
1825 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
1827 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
1833 tree=TreeMem[tree].children[vec[seq_pos+j]];
1834 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1835 if (weights_in_tree)
1836 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
1838 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
1843 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1846 if (weights_in_tree)
1847 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1849 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
1856 for (int32_t j=0; seq_pos+j<len; j++)
1858 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1860 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1862 tree = -TreeMem[tree].children[vec[seq_pos+j]];
1863 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
1864 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1866 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1868 if (weights_in_tree)
1869 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
1871 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
1877 tree=TreeMem[tree].children[vec[seq_pos+j]];
1878 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1879 if (weights_in_tree)
1880 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight ;
1882 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+weight_pos*degree] ;
1887 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1890 if (weights_in_tree)
1891 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1893 LevelContrib[weight_pos/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j+weight_pos*degree] ;
1900 else if (!degree_times_position_weights)
1902 for (int32_t j=0; seq_pos+j<len; j++)
1904 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1906 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1908 tree = -TreeMem[tree].children[vec[seq_pos+j]];
1909 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
1910 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1912 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1914 if (weights_in_tree)
1915 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight ;
1917 LevelContrib[(j+k)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k] ;
1923 tree=TreeMem[tree].children[vec[seq_pos+j]];
1924 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1925 if (weights_in_tree)
1926 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight ;
1928 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j] ;
1933 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1936 if (weights_in_tree)
1937 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]] ;
1939 LevelContrib[j/mkl_stepsize] += factor*TreeMem[tree].child_weights[vec[seq_pos+j]]*weights[j] ;
1965 for (int32_t j=0; seq_pos+j<len; j++)
1967 if ((j<degree-1) && (TreeMem[tree].children[vec[seq_pos+j]]!=NO_CHILD))
1969 if (TreeMem[tree].children[vec[seq_pos+j]]<0)
1971 tree = -TreeMem[tree].children[vec[seq_pos+j]];
1972 TRIE_ASSERT_EVERYTHING(TreeMem[tree].has_seq) ;
1973 for (int32_t k=0; (j+k<degree) && (seq_pos+j+k<length); k++)
1975 if (TreeMem[tree].seq[k]!=vec[seq_pos+j+k])
1977 if (weights_in_tree)
1978 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight ;
1980 LevelContrib[(j+k+degree*weight_pos)/mkl_stepsize] += factor*TreeMem[tree].weight*weights[j+k+weight_pos*degree] ;
1986 tree=TreeMem[tree].children[vec[seq_pos+j]];
1987 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1988 if (weights_in_tree)
1989 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight ;
1991 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].weight * weights[j+weight_pos*degree] ;
1996 TRIE_ASSERT_EVERYTHING(!TreeMem[tree].has_seq) ;
1999 if (weights_in_tree)
2000 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] ;
2002 LevelContrib[(j+degree*weight_pos)/mkl_stepsize] += factor * TreeMem[tree].child_weights[vec[seq_pos+j]] * weights[j+weight_pos*degree] ;
2010 template <
class Trie>
2012 Trie* tree, int32_t depth, uint64_t seq,
float64_t value,
2017 if (weights_in_tree || depth==0)
2018 value+=tree->weight;
2021 w=weights[degree-1];
2022 value+=weights[depth-1]*tree->weight;
2025 if (degree-1==depth)
2027 for (int32_t sym=0; sym<4; sym++)
2032 ConsensusEntry entry;
2034 entry.score=value+v;
2035 entry.string=seq | ((uint64_t) sym) << (2*(degree-depth-1));
2043 for (int32_t sym=0; sym<4; sym++)
2045 uint64_t str=seq | ((uint64_t) sym) << (2*(degree-depth-1));
2046 if (tree->children[sym] != NO_CHILD)
2047 fill_backtracking_table_recursion(&TreeMem[tree->children[sym]], depth+1, str, value, table, weights);
2052 template <
class Trie>
2054 int32_t pos, uint64_t seq, int32_t deg,
float64_t* weights)
2060 for (int32_t i=pos; i<pos+deg && i<length; i++)
2063 Trie* tree = &TreeMem[trees[i]];
2065 for (int32_t d=0; d<deg-i+pos; d++)
2069 int32_t sym = (int32_t) (seq >> (2*(deg-1-d-i+pos)) & 3);
2072 if (!weights_in_tree)
2075 ASSERT(tree->children[sym] != NO_CHILD);
2076 tree=&TreeMem[tree->children[sym]];
2077 result+=w*tree->weight;
2084 template <
class Trie>
2089 ASSERT(pos>=0 && pos<length);
2090 ASSERT(!use_compact_terminal_nodes);
2092 Trie* t = &TreeMem[trees[pos]];
2094 fill_backtracking_table_recursion(t, 0, (uint64_t) 0, 0.0, cur, weights);
2100 for (int32_t i=0; i<num_cur; i++)
2103 entry.score+=get_cumulative_score(pos+1, entry.string, degree-1, weights);
2116 for (int32_t i=0; i<num_cur; i++)
2119 uint64_t str_cur= cur->
get_element(i).string >> 2;
2125 for (int32_t j=0; j<num_prev; j++)
2129 ((((uint64_t)0)-1) ^ (((uint64_t) 3) << (2*(degree-1))));
2130 uint64_t str_prev= mask & prev->
get_element(j).string;
2133 if (str_cur == str_prev)
2136 if (bt==-1 || sc>max_score)
2147 ConsensusEntry entry;
2149 entry.score=max_score;
2157 #endif // _TRIE_H___