SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Trie.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg, Gunnar Raetsch
8  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
12 #include <shogun/io/SGIO.h>
13 #include <shogun/lib/Trie.h>
14 
15 namespace shogun
16 {
17 template <>
19  const int32_t nodeIdx, const int32_t depth, const int32_t offset,
20  const int32_t y0, float64_t* const* const W, const int32_t K )
21 {
22  ASSERT(nodeIdx!=NO_CHILD)
23  ASSERT(depth<K)
24  float64_t* const W_kiy = & W[ depth ][ offset + y0 ];
25  POIMTrie* const node = &TreeMem[ nodeIdx ];
26  int32_t sym;
27 
28  if( depth < degree-1 )
29  {
30  const int32_t offset1 = offset * NUM_SYMS;
31  for( sym = 0; sym < NUM_SYMS; ++sym )
32  {
33  ASSERT(W_kiy[sym]==0)
34  const int32_t childIdx = node->children[ sym ];
35  if( childIdx != NO_CHILD )
36  {
37  W_kiy[ sym ] = TreeMem[ childIdx ].weight;
38 
39  if (depth < K-1)
40  {
41  const int32_t y1 = ( y0 + sym ) * NUM_SYMS;
42  POIMs_extract_W_helper( childIdx, depth+1, offset1, y1, W, K );
43  }
44  }
45  }
46  }
47  else
48  {
49  ASSERT(depth==degree-1)
50  for( sym = 0; sym < NUM_SYMS; ++sym )
51  {
52  ASSERT(W_kiy[sym]==0)
53  W_kiy[ sym ] = node->child_weights[ sym ];
54  }
55  }
56 }
57 
58 template <>
60  float64_t* const* const W, const int32_t K)
61 {
62  ASSERT(degree>=1)
63  ASSERT(K>=1)
64  const int32_t N = length;
65  int32_t i;
66  for( i = 0; i < N; ++i ) {
67  //SG_PRINT("W_helper( %d )\n", i )
68  POIMs_extract_W_helper( trees[i], 0, i*NUM_SYMS, 0*NUM_SYMS, W, K );
69  }
70 }
71 
72 template <>
74  const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
75  int32_t left_tries_idx[4], const int32_t depth, int32_t const lastSym,
76  float64_t* S, float64_t* L, float64_t* R )
77 {
78  ASSERT(depth==degree-1)
79  ASSERT(nodeIdx!=NO_CHILD)
80 
81  const float64_t* const distribLeft = & distrib[ (i-1) * NUM_SYMS ];
82  POIMTrie* const node = &TreeMem[ nodeIdx ];
83  int32_t symRight;
84  int32_t symLeft;
85 
86  // --- init
87  node->S = 0;
88  node->L = 0;
89  node->R = 0;
90 
91  if (i+depth<length)
92  {
93  const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
94 
95  // --- go thru direct children
96  for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
97  const float64_t w1 = node->child_weights[ symRight ];
98  const float64_t pRight = distribRight[ symRight ];
99  const float64_t incr1 = pRight * w1;
100  node->S += incr1;
101  node->R += incr1;
102  }
103  }
104 
105  // --- collect precalced values from left neighbor tree
106  for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
107  {
108  if (left_tries_idx[symLeft] != NO_CHILD)
109  {
110  POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
111 
112  ASSERT(nodeLeft)
113  const float64_t w2 = nodeLeft->child_weights[ lastSym ];
114  const float64_t pLeft = distribLeft[ symLeft ];
115  const float64_t incr2 = pLeft * w2;
116  node->S += incr2;
117  node->L += incr2;
118  }
119  }
120 
121  // --- add w and return results
122  const float64_t w0 = node->weight;
123  node->S += w0;
124  node->L += w0;
125  node->R += w0;
126  *S = node->S;
127  *L = node->L;
128  *R = node->R;
129 }
130 
131 
132 template <>
134  const float64_t* const distrib, const int32_t i, const int32_t nodeIdx,
135  int32_t left_tries_idx[4], const int32_t depth, float64_t* S, float64_t* L,
136  float64_t* R )
137 {
138  ASSERT(0<=depth && depth<=degree-2)
139  ASSERT(nodeIdx!=NO_CHILD)
140 
141  const float64_t* const distribLeft = & distrib[ (i-1) * NUM_SYMS ];
142  POIMTrie* const node = &TreeMem[ nodeIdx ];
143  float64_t dummy;
144  float64_t auxS;
145  float64_t auxR;
146  int32_t symRight;
147  int32_t symLeft;
148 
149  // --- init
150  node->S = 0;
151  node->L = 0;
152  node->R = 0;
153 
154  // --- recurse thru direct children
155  for( symRight = 0; symRight < NUM_SYMS; ++symRight )
156  {
157  const int32_t childIdx = node->children[ symRight ];
158  if( childIdx != NO_CHILD )
159  {
160  if( depth < degree-2 )
161  {
162  int32_t new_left_tries_idx[4];
163 
164  for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
165  {
166  new_left_tries_idx[symLeft]=NO_CHILD;
167 
168  if (left_tries_idx[symLeft] != NO_CHILD)
169  {
170  POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
171  ASSERT(nodeLeft)
172  new_left_tries_idx[ symLeft ]=nodeLeft->children[ symRight ];
173  }
174  }
175  POIMs_calc_SLR_helper2( distrib, i, childIdx, new_left_tries_idx, depth+1, &auxS, &dummy, &auxR );
176  }
177  else
178  POIMs_calc_SLR_helper1( distrib, i, childIdx, left_tries_idx, depth+1, symRight, &auxS, &dummy, &auxR );
179 
180  if (i+depth<length)
181  {
182  const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
183  const float64_t pRight = distribRight[ symRight ];
184  node->S += pRight * auxS;
185  node->R += pRight * auxR;
186  }
187  }
188  }
189 
190  // --- collect precalced values from left neighbor tree
191  for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
192  {
193  if (left_tries_idx[symLeft] != NO_CHILD)
194  {
195  const POIMTrie* nodeLeft = &TreeMem[left_tries_idx[symLeft]];
196  ASSERT(nodeLeft)
197  const float64_t pLeft = distribLeft[ symLeft ];
198 
199  node->S += pLeft * nodeLeft->S;
200  node->L += pLeft * nodeLeft->L;
201 
202  if (i+depth<length)
203  {
204  const float64_t* const distribRight = & distrib[ (i+depth) * NUM_SYMS ];
205  // - second order correction for S
206  auxS = 0;
207  if( depth < degree-2 )
208  {
209  for( symRight = 0; symRight < NUM_SYMS; ++symRight )
210  {
211  const int32_t childIdxLeft = nodeLeft->children[ symRight ];
212  if( childIdxLeft != NO_CHILD )
213  {
214  const POIMTrie* const childLeft = &TreeMem[ childIdxLeft ];
215  auxS += distribRight[symRight] * childLeft->S;
216  }
217  }
218  }
219  else
220  {
221  for( symRight = 0; symRight < NUM_SYMS; ++symRight ) {
222  auxS += distribRight[symRight] * nodeLeft->child_weights[ symRight ];
223  }
224  }
225  node->S -= pLeft* auxS;
226  }
227  }
228  }
229 
230  // --- add w and return results
231  const float64_t w0 = node->weight;
232  //SG_PRINT(" d=%d, node=%d, dS=%.3f, w=%.3f\n", depth, nodeIdx, node->S, w0 )
233  node->S += w0;
234  node->L += w0;
235  node->R += w0;
236  *S = node->S;
237  *L = node->L;
238  *R = node->R;
239 }
240 
241 
242 
243 template <>
244 void CTrie<POIMTrie>::POIMs_precalc_SLR( const float64_t* const distrib )
245 {
246  if( degree == 1 ) {
247  return;
248  }
249 
250  ASSERT(degree>=2)
251  const int32_t N = length;
252  float64_t dummy;
253  int32_t symLeft;
254  int32_t leftSubtrees[4];
255  for( symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
256  leftSubtrees[ symLeft ] = NO_CHILD;
257 
258  for(int32_t i = 0; i < N; ++i )
259  {
260  POIMs_calc_SLR_helper2( distrib, i, trees[i], leftSubtrees, 0, &dummy, &dummy, &dummy );
261 
262  const POIMTrie* const node = &TreeMem[ trees[i] ];
263  ASSERT(trees[i]!=NO_CHILD)
264 
265  for(symLeft = 0; symLeft < NUM_SYMS; ++symLeft )
266  leftSubtrees[ symLeft ] = node->children[ symLeft ];
267  }
268 }
269 
270 template <>
272  const int32_t parentIdx, const int32_t sym, const int32_t depth,
273  float64_t* S, float64_t* L, float64_t* R )
274 {
275  ASSERT(parentIdx!=NO_CHILD)
276  const POIMTrie* const parent = &TreeMem[ parentIdx ];
277  if( depth < degree ) {
278  const int32_t nodeIdx = parent->children[ sym ];
279  const POIMTrie* const node = &TreeMem[ nodeIdx ];
280  *S = node->S;
281  *L = node->L;
282  *R = node->R;
283  } else {
284  ASSERT(depth==degree)
285  const float64_t w = parent->child_weights[ sym ];
286  *S = w;
287  *L = w;
288  *R = w;
289  }
290 }
291 
292 template <>
294  float64_t* const* const poims, const int32_t K, const int32_t k,
295  const int32_t i, const int32_t y, const float64_t valW,
296  const float64_t valS, const float64_t valL, const float64_t valR,
297  const int32_t debug)
298 {
299  //SG_PRINT("i=%d, d=%d, y=%d: w=%.3f \n", i, k, y, valW )
300  const int32_t nk = nofsKmers[ k ];
301  ASSERT(1<=k && k<=K)
302  ASSERT(0<=y && y<nk)
303  int32_t z;
304  int32_t j;
305 
306  // --- add superstring score; subtract "w", as it was counted twice
307  if( debug==0 || debug==2 )
308  {
309  poims[ k-1 ][ i*nk + y ] += valS - valW;
310  }
311 
312  // --- left partial overlaps
313  if( debug==0 || debug==3 )
314  {
315  int32_t r;
316  for( r = 1; k+r <= K; ++r )
317  {
318  const int32_t nr = nofsKmers[ r ];
319  const int32_t nz = nofsKmers[ k+r ];
320  float64_t* const poim = & poims[ k+r-1 ][ i*nz ];
321  z = y * nr;
322  for( j = 0; j < nr; ++j )
323  {
324  if( !( 0 <= z && z < nz ) ) {
325  SG_PRINT("k=%d, nk=%d, r=%d, nr=%d, nz=%d \n", k, nk, r, nr, nz )
326  SG_PRINT(" j=%d, y=%d, z=%d \n", j, y, z )
327  }
328  ASSERT(0<=z && z<nz)
329  poim[ z ] += valL - valW;
330  ++z;
331  }
332  }
333  }
334  // --- right partial overlaps
335  if( debug==0 || debug==4 )
336  {
337  int32_t l;
338  for( l = 1; k+l <= K && l <= i; ++l )
339  {
340  const int32_t nl = nofsKmers[ l ];
341  const int32_t nz = nofsKmers[ k+l ];
342  float64_t* const poim = & poims[ k+l-1 ][ (i-l)*nz ];
343  z = y;
344  for( j = 0; j < nl; ++j ) {
345  ASSERT(0<=z && z<nz)
346  poim[ z ] += valR - valW;
347  z += nk;
348  }
349  }
350  }
351 }
352 
353 template <>
355  const int32_t nodeIdx, const int32_t depth, const int32_t i,
356  const int32_t y0, float64_t* const* const poims, const int32_t K,
357  const int32_t debug)
358 {
359  ASSERT(nodeIdx!=NO_CHILD)
360  ASSERT(depth<K)
361  POIMTrie* const node = &TreeMem[ nodeIdx ];
362  int32_t sym;
363  if( depth < degree-1 )
364  {
365  if( depth < K-1 )
366  {
367  for( sym = 0; sym < NUM_SYMS; ++sym )
368  {
369  const int32_t childIdx = node->children[ sym ];
370  if( childIdx != NO_CHILD )
371  {
372  POIMTrie* const child = &TreeMem[ childIdx ];
373  const int32_t y = y0 + sym;
374  const int32_t y1 = y * NUM_SYMS;
375  const float64_t w = child->weight;
376  POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
377  POIMs_add_SLR_helper1( childIdx, depth+1, i, y1, poims, K, debug );
378  }
379  }
380  }
381  else
382  {
383  ASSERT(depth==K-1)
384  for( sym = 0; sym < NUM_SYMS; ++sym )
385  {
386  const int32_t childIdx = node->children[ sym ];
387  if( childIdx != NO_CHILD ) {
388  POIMTrie* const child = &TreeMem[ childIdx ];
389  const int32_t y = y0 + sym;
390  const float64_t w = child->weight;
391  POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, child->S, child->L, child->R, debug );
392  }
393  }
394  }
395  }
396  else
397  {
398  ASSERT(depth==degree-1)
399  for( sym = 0; sym < NUM_SYMS; ++sym )
400  {
401  const float64_t w = node->child_weights[ sym ];
402  const int32_t y = y0 + sym;
403  POIMs_add_SLR_helper2( poims, K, depth+1, i, y, w, w, w, w, debug );
404  }
405  }
406 }
407 
408 
409 
410 template <>
412  float64_t* const* const poims, const int32_t K, const int32_t debug)
413 {
414  ASSERT(degree>=1)
415  ASSERT(K>=1)
416  {
417  const int32_t m = ( degree > K ) ? degree : K;
418  nofsKmers = SG_MALLOC(int32_t, m+1 );
419  int32_t n;
420  int32_t k;
421  n = 1;
422  for( k = 0; k < m+1; ++k ) {
423  nofsKmers[ k ] = n;
424  n *= NUM_SYMS;
425  }
426  }
427  const int32_t N = length;
428  int32_t i;
429  for( i = 0; i < N; ++i ) {
430  POIMs_add_SLR_helper1( trees[i], 0, i, 0*NUM_SYMS, poims, K, debug );
431  }
432  SG_FREE(nofsKmers);
433 }
434 }

SHOGUN Machine Learning Toolbox - Documentation