SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
Trie.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg, Gunnar Raetsch
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
12 #include <shogun/io/SGIO.h>
13 #include <shogun/lib/Trie.h>
14 #include <string.h>
15 
16 namespace shogun
17 {
18 template <>
20  const int32_t nodeIdx, const int32_t depth, const int32_t offset,
21  const int32_t y0, float64_t* const* const W, const int32_t K )
22 {
23  ASSERT(nodeIdx!=NO_CHILD)
24  ASSERT(depth<K)
25  float64_t* const W_kiy = & W[ depth ][ offset + y0 ];
26  POIMTrie* const node = &TreeMem[ nodeIdx ];
27  int32_t sym;
28 
29  if( depth < degree-1 )
30  {
31  const int32_t offset1 = offset * NUM_SYMS;
32  for( sym = 0; sym < NUM_SYMS; ++sym )
33  {
34  ASSERT(W_kiy[sym]==0)
35  const int32_t childIdx = node->children[ sym ];
36  if( childIdx != NO_CHILD )
37  {
38  W_kiy[ sym ] = TreeMem[ childIdx ].weight;
39 
40  if (depth < K-1)
41  {
42  const int32_t y1 = ( y0 + sym ) * NUM_SYMS;
43  POIMs_extract_W_helper( childIdx, depth+1, offset1, y1, W, K );
44  }
45  }
46  }
47  }
48  else
49  {
50  ASSERT(depth==degree-1)
51  for( sym = 0; sym < NUM_SYMS; ++sym )
52  {
53  ASSERT(W_kiy[sym]==0)
54  W_kiy[ sym ] = node->child_weights[ sym ];
55  }
56  }
57 }
58 
59 template <>
61  float64_t* const* const W, const int32_t K)
62 {
63  ASSERT(degree>=1)
64  ASSERT(K>=1)
65  const int32_t N = length;
66  int32_t i;
67  for( i = 0; i < N; ++i ) {
68  //SG_PRINT("W_helper( %d )\n", i )
69  POIMs_extract_W_helper( trees[i], 0, i*NUM_SYMS, 0*NUM_SYMS, W, K );
70  }
71 }
72 
73 template <>
75  const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
76  int32_t left_tries_idx[4], const int32_t depth, int32_t const lastSym,
77  float64_t* S, float64_t* L, float64_t* R )
78 {
79  ASSERT(depth==degree-1)
80  ASSERT(nodeIdx!=NO_CHILD)
81 
82  const float64_t* const distribLeft = & distrib[ (i-1) * NUM_SYMS ];
83  POIMTrie* const node = &TreeMem[ nodeIdx ];
84  int32_t symRight;
85  int32_t symLeft;
86 
87  // --- init
88  node->S = 0;
89  node->L = 0;
90  node->R = 0;
91 
92  if (i+depth<length)
93  {
94  const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
95 
96  // --- go thru direct children
97  for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
98  const float64_t w1 = node->child_weights[ symRight ];
99  const float64_t pRight = distribRight[ symRight ];
100  const float64_t incr1 = pRight * w1;
101  node->S += incr1;
102  node->R += incr1;
103  }
104  }
105 
106  // --- collect precalced values from left neighbor tree
107  for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
108  {
109  if (left_tries_idx[symLeft] != NO_CHILD)
110  {
111  POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
112 
113  ASSERT(nodeLeft)
114  const float64_t w2 = nodeLeft->child_weights[ lastSym ];
115  const float64_t pLeft = distribLeft[ symLeft ];
116  const float64_t incr2 = pLeft * w2;
117  node->S += incr2;
118  node->L += incr2;
119  }
120  }
121 
122  // --- add w and return results
123  const float64_t w0 = node->weight;
124  node->S += w0;
125  node->L += w0;
126  node->R += w0;
127  *S = node->S;
128  *L = node->L;
129  *R = node->R;
130 }
131 
132 
133 template <>
135  const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
136  int32_t left_tries_idx[4], const int32_t depth, float64_t* S, float64_t* L,
137  float64_t* R )
138 {
139  ASSERT(0<=depth && depth<=degree-2)
140  ASSERT(nodeIdx!=NO_CHILD)
141 
142  const float64_t* const distribLeft = & distrib[ (i-1) * NUM_SYMS ];
143  POIMTrie* const node = &TreeMem[ nodeIdx ];
144  float64_t dummy;
145  float64_t auxS;
146  float64_t auxR;
147  int32_t symRight;
148  int32_t symLeft;
149 
150  // --- init
151  node->S = 0;
152  node->L = 0;
153  node->R = 0;
154 
155  // --- recurse thru direct children
156  for( symRight = 0; symRight < NUM_SYMS; ++symRight )
157  {
158  const int32_t childIdx = node->children[ symRight ];
159  if( childIdx != NO_CHILD )
160  {
161  if( depth < degree-2 )
162  {
163  int32_t new_left_tries_idx[4];
164 
165  for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
166  {
167  new_left_tries_idx[symLeft]=NO_CHILD;
168 
169  if (left_tries_idx[symLeft] != NO_CHILD)
170  {
171  POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
172  ASSERT(nodeLeft)
173  new_left_tries_idx[ symLeft ]=nodeLeft->children[ symRight ];
174  }
175  }
176  POIMs_calc_SLR_helper2( distrib, i, childIdx, new_left_tries_idx, depth+1, &auxS, &dummy, &auxR );
177  }
178  else
179  POIMs_calc_SLR_helper1( distrib, i, childIdx, left_tries_idx, depth+1, symRight, &auxS, &dummy, &auxR );
180 
181  if (i+depth<length)
182  {
183  const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
184  const float64_t pRight = distribRight[ symRight ];
185  node->S += pRight * auxS;
186  node->R += pRight * auxR;
187  }
188  }
189  }
190 
191  // --- collect precalced values from left neighbor tree
192  for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
193  {
194  if (left_tries_idx[symLeft] != NO_CHILD)
195  {
196  const POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
197  ASSERT(nodeLeft)
198  const float64_t pLeft = distribLeft[ symLeft ];
199 
200  node->S += pLeft * nodeLeft->S;
201  node->L += pLeft * nodeLeft->L;
202 
203  if (i+depth<length)
204  {
205  const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
206  // - second order correction for S
207  auxS = 0;
208  if( depth < degree-2 )
209  {
210  for( symRight = 0; symRight < NUM_SYMS; ++symRight )
211  {
212  const int32_t childIdxLeft = nodeLeft->children[ symRight ];
213  if( childIdxLeft != NO_CHILD )
214  {
215  const POIMTrie* const childLeft = &TreeMem[ childIdxLeft ];
216  auxS += distribRight[symRight] * childLeft->S;
217  }
218  }
219  }
220  else
221  {
222  for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
223  auxS += distribRight[symRight] * nodeLeft->child_weights[ symRight ];
224  }
225  }
226  node->S -= pLeft* auxS;
227  }
228  }
229  }
230 
231  // --- add w and return results
232  const float64_t w0 = node->weight;
233  //SG_PRINT(" d=%d, node=%d, dS=%.3f, w=%.3f\n", depth, nodeIdx, node->S, w0 )
234  node->S += w0;
235  node->L += w0;
236  node->R += w0;
237  *S = node->S;
238  *L = node->L;
239  *R = node->R;
240 }
241 
242 
243 
244 template <>
245 void CTrie<POIMTrie>::POIMs_precalc_SLR( const float64_t* const distrib )
246 {
247  if( degree == 1 ) {
248  return;
249  }
250 
251  ASSERT(degree>=2)
252  const int32_t N = length;
253  float64_t dummy;
254  int32_t symLeft;
255  int32_t leftSubtrees[4];
256  for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
257  leftSubtrees[ symLeft ] = NO_CHILD;
258 
259  for(int32_t i = 0; i < N; ++i )
260  {
261  POIMs_calc_SLR_helper2( distrib, i, trees[i], leftSubtrees, 0, &dummy, &dummy, &dummy );
262 
263  const POIMTrie* const node = &TreeMem[ trees[i] ];
264  ASSERT(trees[i]!=NO_CHILD)
265 
266  for(symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
267  leftSubtrees[ symLeft ] = node->children[ symLeft ];
268  }
269 }
270 
271 template <>
273  const int32_t parentIdx, const int32_t sym, const int32_t depth,
274  float64_t* S, float64_t* L, float64_t* R )
275 {
276  ASSERT(parentIdx!=NO_CHILD)
277  const POIMTrie* const parent = &TreeMem[ parentIdx ];
278  if( depth < degree ) {
279  const int32_t nodeIdx = parent->children[ sym ];
280  const POIMTrie* const node = &TreeMem[ nodeIdx ];
281  *S = node->S;
282  *L = node->L;
283  *R = node->R;
284  } else {
285  ASSERT(depth==degree)
286  const float64_t w = parent->child_weights[ sym ];
287  *S = w;
288  *L = w;
289  *R = w;
290  }
291 }
292 
293 template <>
295  float64_t* const* const poims, const int32_t K, const int32_t k,
296  const int32_t i, const int32_t y, const float64_t valW,
297  const float64_t valS, const float64_t valL, const float64_t valR,
298  const int32_t debug)
299 {
300  //SG_PRINT("i=%d, d=%d, y=%d: w=%.3f \n", i, k, y, valW )
301  const int32_t nk = nofsKmers[ k ];
302  ASSERT(1<=k && k<=K)
303  ASSERT(0<=y && y<nk)
304  int32_t z;
305  int32_t j;
306 
307  // --- add superstring score; subtract "w", as it was counted twice
308  if( debug==0 || debug==2 )
309  {
310  poims[ k-1 ][ i*nk + y ] += valS - valW;
311  }
312 
313  // --- left partial overlaps
314  if( debug==0 || debug==3 )
315  {
316  int32_t r;
317  for( r = 1; k+r <= K; ++r )
318  {
319  const int32_t nr = nofsKmers[ r ];
320  const int32_t nz = nofsKmers[ k+r ];
321  float64_t* const poim = & poims[ k+r-1 ][ i*nz ];
322  z = y * nr;
323  for( j = 0; j < nr; ++j )
324  {
325  if( !( 0 <= z && z < nz ) ) {
326  SG_PRINT("k=%d, nk=%d, r=%d, nr=%d, nz=%d \n", k, nk, r, nr, nz )
327  SG_PRINT(" j=%d, y=%d, z=%d \n", j, y, z )
328  }
329  ASSERT(0<=z && z<nz)
330  poim[ z ] += valL - valW;
331  ++z;
332  }
333  }
334  }
335  // --- right partial overlaps
336  if( debug==0 || debug==4 )
337  {
338  int32_t l;
339  for( l = 1; k+l <= K && l <= i; ++l )
340  {
341  const int32_t nl = nofsKmers[ l ];
342  const int32_t nz = nofsKmers[ k+l ];
343  float64_t* const poim = & poims[ k+l-1 ][ (i-l)*nz ];
344  z = y;
345  for( j = 0; j < nl; ++j ) {
346  ASSERT(0<=z && z<nz)
347  poim[ z ] += valR - valW;
348  z += nk;
349  }
350  }
351  }
352 }
353 
354 template <>
356  const int32_t nodeIdx, const int32_t depth, const int32_t i,
357  const int32_t y0, float64_t* const* const poims, const int32_t K,
358  const int32_t debug)
359 {
360  ASSERT(nodeIdx!=NO_CHILD)
361  ASSERT(depth<K)
362  POIMTrie* const node = &TreeMem[ nodeIdx ];
363  int32_t sym;
364  if( depth < degree-1 )
365  {
366  if( depth < K-1 )
367  {
368  for( sym = 0; sym < NUM_SYMS; ++sym )
369  {
370  const int32_t childIdx = node->children[ sym ];
371  if( childIdx != NO_CHILD )
372  {
373  POIMTrie* const child = &TreeMem[ childIdx ];
374  const int32_t y = y0 + sym;
375  const int32_t y1 = y * NUM_SYMS;
376  const float64_t w = child->weight;
377  POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
378  POIMs_add_SLR_helper1( childIdx, depth+1, i, y1, poims, K, debug );
379  }
380  }
381  }
382  else
383  {
384  ASSERT(depth==K-1)
385  for( sym = 0; sym < NUM_SYMS; ++sym )
386  {
387  const int32_t childIdx = node->children[ sym ];
388  if( childIdx != NO_CHILD ) {
389  POIMTrie* const child = &TreeMem[ childIdx ];
390  const int32_t y = y0 + sym;
391  const float64_t w = child->weight;
392  POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
393  }
394  }
395  }
396  }
397  else
398  {
399  ASSERT(depth==degree-1)
400  for( sym = 0; sym < NUM_SYMS; ++sym )
401  {
402  const float64_t w = node->child_weights[ sym ];
403  const int32_t y = y0 + sym;
404  POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, w, w, w, debug );
405  }
406  }
407 }
408 
409 
410 
411 template <>
413  float64_t* const* const poims, const int32_t K, const int32_t debug)
414 {
415  ASSERT(degree>=1)
416  ASSERT(K>=1)
417  {
418  const int32_t m = ( degree > K ) ? degree : K;
419  nofsKmers = SG_MALLOC(int32_t, m+1 );
420  int32_t n;
421  int32_t k;
422  n = 1;
423  for( k = 0; k < m+1; ++k ) {
424  nofsKmers[ k ] = n;
425  n *= NUM_SYMS;
426  }
427  }
428  const int32_t N = length;
429  int32_t i;
430  for( i = 0; i < N; ++i ) {
431  POIMs_add_SLR_helper1( trees[i], 0, i, 0*NUM_SYMS, poims, K, debug );
432  }
433  SG_FREE(nofsKmers);
434 }
435 }
node< P > * children
Definition: JLCoverTree.h:54
Template class Trie implements a suffix trie, i.e. a tree in which all suffixes up to a certain lengt...
Definition: Trie.h:136
#define SG_PRINT(...)
Definition: SGIO.h:137
#define ASSERT(x)
Definition: SGIO.h:201
double float64_t
Definition: common.h:50
void POIMs_extract_W_helper(const int32_t nodeIdx, const int32_t depth, const int32_t offset, const int32_t y0, float64_t *const *const W, const int32_t K)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

SHOGUN Machine Learning Toolbox - Documentation