SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RegulatoryModulesStringKernel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2009 Sebastian J. Schultheiss and Soeren Sonnenburg
8  * Copyright (C) 2009 Max-Planck-Society
9  */
10 
11 #include <shogun/lib/common.h>
14 #include <shogun/io/SGIO.h>
15 
16 using namespace shogun;
17 
19 : CStringKernel<char>(0)
20 {
21  init();
22 }
23 
25  int32_t size, float64_t w, int32_t d, int32_t s, int32_t wl)
26 : CStringKernel<char>(size)
27 {
28  init();
29 }
30 
33  float64_t w, int32_t d, int32_t s, int32_t wl, int32_t size)
34 : CStringKernel<char>(size)
35 {
36  init();
37  set_motif_positions(lpos, rpos);
38  init(lstr,rstr);
39 }
40 
42 {
45 }
46 
47 void CRegulatoryModulesStringKernel::init()
48 {
49  width=0;
50  degree=0;
51  shift=0;
52  window=0;
55 
56  SG_ADD(&width, "width", "the width of Gaussian kernel part", MS_AVAILABLE);
57  SG_ADD(&degree, "degree", "the degree of weighted degree kernel part",
58  MS_AVAILABLE);
59  SG_ADD(&shift, "shift",
60  "the shift of weighted degree with shifts kernel part", MS_AVAILABLE);
61  SG_ADD(&window, "window", "the size of window around motifs", MS_AVAILABLE);
62  SG_ADD((CSGObject**)&motif_positions_lhs, "motif_positions_lhs",
63  "the matrix of motif positions from sequences left-hand side", MS_NOT_AVAILABLE);
64  SG_ADD((CSGObject**)&motif_positions_rhs, "motif_positions_rhs",
65  "the matrix of motif positions from sequences right-hand side", MS_NOT_AVAILABLE);
66  SG_ADD(&position_weights, "position_weights", "scaling weights in window", MS_NOT_AVAILABLE);
67  SG_ADD(&weights, "weights", "weights of WD kernel", MS_NOT_AVAILABLE);
68 }
69 
70 bool CRegulatoryModulesStringKernel::init(CFeatures* l, CFeatures* r)
71 {
74 
76  SG_ERROR("Number of vectors does not agree (LHS: %d, Motif LHS: %d).\n",
79  SG_ERROR("Number of vectors does not agree (RHS: %d, Motif RHS: %d).\n",
81 
84  return init_normalizer();
85 }
86 
88  CDenseFeatures<uint16_t>* positions_lhs, CDenseFeatures<uint16_t>* positions_rhs)
89 {
90  ASSERT(positions_lhs)
91  ASSERT(positions_rhs)
94  if (positions_lhs->get_num_features() != positions_rhs->get_num_features())
95  SG_ERROR("Number of dimensions does not agree.\n")
96 
97  motif_positions_lhs=positions_lhs;
98  motif_positions_rhs=positions_rhs;
99  SG_REF(positions_lhs);
100  SG_REF(positions_rhs);
101 }
102 
104 {
107 
108  bool free_avec, free_bvec;
109  int32_t alen=0;
110  int32_t blen=0;
111  char* avec=((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
112  char* bvec=((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
113 
114  int32_t alen_pos, blen_pos;
115  bool afree_pos, bfree_pos;
116  uint16_t* positions_a = motif_positions_lhs->get_feature_vector(idx_a, alen_pos, afree_pos);
117  uint16_t* positions_b = motif_positions_rhs->get_feature_vector(idx_b, blen_pos, bfree_pos);
118  ASSERT(alen_pos==blen_pos)
119  int32_t num_pos=alen_pos;
120 
121 
122  float64_t result_rbf=0;
123  float64_t result_wds=0;
124 
125  for (int32_t p=0; p<num_pos; p++)
126  {
127  result_rbf+=CMath::sq(positions_a[p]-positions_b[p]);
128 
129  for (int32_t p2=0; p2<num_pos; p2++) //p+1 and below * 2
130  result_rbf+=CMath::sq( (positions_a[p]-positions_a[p2]) - (positions_b[p]-positions_b[p2]) );
131 
132  int32_t limit = window;
133  if (window + positions_a[p] > alen)
134  limit = alen - positions_a[p];
135 
136  if (window + positions_b[p] > blen)
137  limit = CMath::min(limit, blen - positions_b[p]);
138 
139  result_wds+=compute_wds(&avec[positions_a[p]], &bvec[positions_b[p]],
140  limit);
141  }
142 
143  float64_t result=exp(-result_rbf/width)+result_wds;
144 
145  ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
146  ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
147  ((CDenseFeatures<uint16_t>*) lhs)->free_feature_vector(positions_a, idx_a, afree_pos);
148  ((CDenseFeatures<uint16_t>*) rhs)->free_feature_vector(positions_b, idx_b, bfree_pos);
149 
150  return result;
151 }
152 
154  char* avec, char* bvec, int32_t len)
155 {
156  float64_t* max_shift_vec = SG_MALLOC(float64_t, shift);
157  float64_t sum0=0 ;
158  for (int32_t i=0; i<shift; i++)
159  max_shift_vec[i]=0 ;
160 
161  // no shift
162  for (int32_t i=0; i<len; i++)
163  {
164  if ((position_weights.vector!=NULL) && (position_weights[i]==0.0))
165  continue ;
166 
167  float64_t sumi = 0.0 ;
168  for (int32_t j=0; (j<degree) && (i+j<len); j++)
169  {
170  if (avec[i+j]!=bvec[i+j])
171  break ;
172  sumi += weights[j];
173  }
174  if (position_weights.vector!=NULL)
175  sum0 += position_weights[i]*sumi ;
176  else
177  sum0 += sumi ;
178  } ;
179 
180  for (int32_t i=0; i<len; i++)
181  {
182  for (int32_t k=1; (k<=shift) && (i+k<len); k++)
183  {
184  if ((position_weights.vector!=NULL) && (position_weights[i]==0.0) && (position_weights[i+k]==0.0))
185  continue ;
186 
187  float64_t sumi1 = 0.0 ;
188  // shift in sequence a
189  for (int32_t j=0; (j<degree) && (i+j+k<len); j++)
190  {
191  if (avec[i+j+k]!=bvec[i+j])
192  break ;
193  sumi1 += weights[j];
194  }
195  float64_t sumi2 = 0.0 ;
196  // shift in sequence b
197  for (int32_t j=0; (j<degree) && (i+j+k<len); j++)
198  {
199  if (avec[i+j]!=bvec[i+j+k])
200  break ;
201  sumi2 += weights[j];
202  }
203  if (position_weights.vector!=NULL)
204  max_shift_vec[k-1] += position_weights[i]*sumi1 + position_weights[i+k]*sumi2 ;
205  else
206  max_shift_vec[k-1] += sumi1 + sumi2 ;
207  } ;
208  }
209 
210  float64_t result = sum0 ;
211  for (int32_t i=0; i<shift; i++)
212  result += max_shift_vec[i]/(2*(i+1)) ;
213 
214  SG_FREE(max_shift_vec);
215  return result ;
216 }
217 
219 {
220  ASSERT(degree>0)
221 
223 
224  int32_t i;
225  float64_t sum=0;
226  for (i=0; i<degree; i++)
227  {
228  weights[i]=degree-i;
229  sum+=weights[i];
230  }
231 
232  for (i=0; i<degree; i++)
233  weights[i]/=sum;
234 }
235 

SHOGUN Machine Learning Toolbox - Documentation