SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
TOPFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
13 #include <shogun/io/SGIO.h>
15 
16 using namespace shogun;
17 
19 {
20  init();
21 }
22 
24  int32_t size, CHMM* p, CHMM* n, bool neglin, bool poslin)
26 {
27  init();
28  neglinear=neglin;
29  poslinear=poslin;
30 
31  set_models(p,n);
32 }
33 
36 {
37  init();
38  pos=orig.pos;
39  neg=orig.neg;
40  neglinear=orig.neglinear;
41  poslinear=orig.poslinear;
42 }
43 
45 {
46  SG_FREE(pos_relevant_indizes.idx_p);
47  SG_FREE(pos_relevant_indizes.idx_q);
48  SG_FREE(pos_relevant_indizes.idx_a_cols);
49  SG_FREE(pos_relevant_indizes.idx_a_rows);
50  SG_FREE(pos_relevant_indizes.idx_b_cols);
51  SG_FREE(pos_relevant_indizes.idx_b_rows);
52 
53  SG_FREE(neg_relevant_indizes.idx_p);
54  SG_FREE(neg_relevant_indizes.idx_q);
55  SG_FREE(neg_relevant_indizes.idx_a_cols);
56  SG_FREE(neg_relevant_indizes.idx_a_rows);
57  SG_FREE(neg_relevant_indizes.idx_b_cols);
58  SG_FREE(neg_relevant_indizes.idx_b_rows);
59 
60  SG_UNREF(pos);
61  SG_UNREF(neg);
62 }
63 
65 {
66  ASSERT(p && n)
67  SG_REF(p);
68  SG_REF(n);
69 
70  pos=p;
71  neg=n;
72  set_num_vectors(0);
73 
75 
76  if (pos && pos->get_observations())
78 
82 
83  SG_DEBUG("pos_feat=[%i,%i,%i,%i],neg_feat=[%i,%i,%i,%i] -> %i features\n", pos->get_N(), pos->get_N(), pos->get_N()*pos->get_N(), pos->get_N()*pos->get_M(), neg->get_N(), neg->get_N(), neg->get_N()*neg->get_N(), neg->get_N()*neg->get_M(),num_features)
84 }
85 
87  int32_t num, int32_t &len, float64_t* target)
88 {
89  float64_t* featurevector=target;
90 
91  if (!featurevector)
92  featurevector=SG_MALLOC(float64_t, get_num_features());
93 
94  if (!featurevector)
95  return NULL;
96 
97  compute_feature_vector(featurevector, num, len);
98 
99  return featurevector;
100 }
101 
103  float64_t* featurevector, int32_t num, int32_t& len)
104 {
105  int32_t i,j,p=0,x=num;
106  int32_t idx=0;
107 
108  float64_t posx=(poslinear) ?
110  float64_t negx=(neglinear) ?
112 
113  len=get_num_features();
114 
115  featurevector[p++]=(posx-negx);
116 
117  //first do positive model
118  if (poslinear)
119  {
120  for (i=0; i<pos->get_N(); i++)
121  {
122  for (j=0; j<pos->get_M(); j++)
123  featurevector[p++]=exp(pos->linear_model_derivative(i, j, x)-posx);
124  }
125  }
126  else
127  {
128  for (idx=0; idx< pos_relevant_indizes.num_p; idx++)
129  featurevector[p++]=exp(pos->model_derivative_p(pos_relevant_indizes.idx_p[idx], x)-posx);
130 
131  for (idx=0; idx< pos_relevant_indizes.num_q; idx++)
132  featurevector[p++]=exp(pos->model_derivative_q(pos_relevant_indizes.idx_q[idx], x)-posx);
133 
134  for (idx=0; idx< pos_relevant_indizes.num_a; idx++)
135  featurevector[p++]=exp(pos->model_derivative_a(pos_relevant_indizes.idx_a_rows[idx], pos_relevant_indizes.idx_a_cols[idx], x)-posx);
136 
137  for (idx=0; idx< pos_relevant_indizes.num_b; idx++)
138  featurevector[p++]=exp(pos->model_derivative_b(pos_relevant_indizes.idx_b_rows[idx], pos_relevant_indizes.idx_b_cols[idx], x)-posx);
139 
140 
141  //for (i=0; i<pos->get_N(); i++)
142  //{
143  // featurevector[p++]=exp(pos->model_derivative_p(i, x)-posx);
144  // featurevector[p++]=exp(pos->model_derivative_q(i, x)-posx);
145 
146  // for (j=0; j<pos->get_N(); j++)
147  // featurevector[p++]=exp(pos->model_derivative_a(i, j, x)-posx);
148 
149  // for (j=0; j<pos->get_M(); j++)
150  // featurevector[p++]=exp(pos->model_derivative_b(i, j, x)-posx);
151  //}
152  }
153 
154  //then do negative
155  if (neglinear)
156  {
157  for (i=0; i<neg->get_N(); i++)
158  {
159  for (j=0; j<neg->get_M(); j++)
160  featurevector[p++]= - exp(neg->linear_model_derivative(i, j, x)-negx);
161  }
162  }
163  else
164  {
165  for (idx=0; idx< neg_relevant_indizes.num_p; idx++)
166  featurevector[p++]= - exp(neg->model_derivative_p(neg_relevant_indizes.idx_p[idx], x)-negx);
167 
168  for (idx=0; idx< neg_relevant_indizes.num_q; idx++)
169  featurevector[p++]= - exp(neg->model_derivative_q(neg_relevant_indizes.idx_q[idx], x)-negx);
170 
171  for (idx=0; idx< neg_relevant_indizes.num_a; idx++)
172  featurevector[p++]= - exp(neg->model_derivative_a(neg_relevant_indizes.idx_a_rows[idx], neg_relevant_indizes.idx_a_cols[idx], x)-negx);
173 
174  for (idx=0; idx< neg_relevant_indizes.num_b; idx++)
175  featurevector[p++]= - exp(neg->model_derivative_b(neg_relevant_indizes.idx_b_rows[idx], neg_relevant_indizes.idx_b_cols[idx], x)-negx);
176 
177  //for (i=0; i<neg->get_N(); i++)
178  //{
179  // featurevector[p++]= - exp(neg->model_derivative_p(i, x)-negx);
180  // featurevector[p++]= - exp(neg->model_derivative_q(i, x)-negx);
181 
182  // for (j=0; j<neg->get_N(); j++)
183  // featurevector[p++]= - exp(neg->model_derivative_a(i, j, x)-negx);
184 
185  // for (j=0; j<neg->get_M(); j++)
186  // featurevector[p++]= - exp(neg->model_derivative_b(i, j, x)-negx);
187  //}
188  }
189 }
190 
192 {
193  int32_t len=0;
194 
197  ASSERT(pos)
199 
201  SG_INFO("allocating top feature cache of size %.2fM\n", sizeof(float64_t)*num_features*num_vectors/1024.0/1024.0)
203  if (!feature_matrix.matrix)
204  {
205  SG_ERROR("allocation not successful!")
206  return NULL ;
207  } ;
208 
209  SG_INFO("calculating top feature matrix\n")
210 
211  for (int32_t x=0; x<num_vectors; x++)
212  {
213  if (!(x % (num_vectors/10+1)))
214  SG_DEBUG("%02d%%.", (int) (100.0*x/num_vectors))
215  else if (!(x % (num_vectors/200+1)))
216  SG_DEBUG(".")
217 
218  compute_feature_vector(&feature_matrix[x*num_features], x, len);
219  }
220 
221  SG_DONE()
222 
223  num_vectors=get_num_vectors() ;
224  num_features=get_num_features() ;
225 
226  return feature_matrix.matrix;
227 }
228 
229 bool CTOPFeatures::compute_relevant_indizes(CHMM* hmm, T_HMM_INDIZES* hmm_idx)
230 {
231  int32_t i=0;
232  int32_t j=0;
233 
234  hmm_idx->num_p=0;
235  hmm_idx->num_q=0;
236  hmm_idx->num_a=0;
237  hmm_idx->num_b=0;
238 
239  for (i=0; i<hmm->get_N(); i++)
240  {
241  if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
242  hmm_idx->num_p++;
243 
244  if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
245  hmm_idx->num_q++;
246 
247  for (j=0; j<hmm->get_N(); j++)
248  {
249  if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
250  hmm_idx->num_a++;
251  }
252 
253  for (j=0; j<pos->get_M(); j++)
254  {
255  if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
256  hmm_idx->num_b++;
257  }
258  }
259 
260  if (hmm_idx->num_p > 0)
261  {
262  hmm_idx->idx_p=SG_MALLOC(int32_t, hmm_idx->num_p);
263  ASSERT(hmm_idx->idx_p)
264  }
265 
266  if (hmm_idx->num_q > 0)
267  {
268  hmm_idx->idx_q=SG_MALLOC(int32_t, hmm_idx->num_q);
269  ASSERT(hmm_idx->idx_q)
270  }
271 
272  if (hmm_idx->num_a > 0)
273  {
274  hmm_idx->idx_a_rows=SG_MALLOC(int32_t, hmm_idx->num_a);
275  hmm_idx->idx_a_cols=SG_MALLOC(int32_t, hmm_idx->num_a);
276  ASSERT(hmm_idx->idx_a_rows)
277  ASSERT(hmm_idx->idx_a_cols)
278  }
279 
280  if (hmm_idx->num_b > 0)
281  {
282  hmm_idx->idx_b_rows=SG_MALLOC(int32_t, hmm_idx->num_b);
283  hmm_idx->idx_b_cols=SG_MALLOC(int32_t, hmm_idx->num_b);
284  ASSERT(hmm_idx->idx_b_rows)
285  ASSERT(hmm_idx->idx_b_cols)
286  }
287 
288 
289  int32_t idx_p=0;
290  int32_t idx_q=0;
291  int32_t idx_a=0;
292  int32_t idx_b=0;
293 
294  for (i=0; i<hmm->get_N(); i++)
295  {
296  if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
297  {
298  ASSERT(idx_p < hmm_idx->num_p)
299  hmm_idx->idx_p[idx_p++]=i;
300  }
301 
302  if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
303  {
304  ASSERT(idx_q < hmm_idx->num_q)
305  hmm_idx->idx_q[idx_q++]=i;
306  }
307 
308  for (j=0; j<hmm->get_N(); j++)
309  {
310  if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
311  {
312  ASSERT(idx_a < hmm_idx->num_a)
313  hmm_idx->idx_a_rows[idx_a]=i;
314  hmm_idx->idx_a_cols[idx_a++]=j;
315  }
316  }
317 
318  for (j=0; j<pos->get_M(); j++)
319  {
320  if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
321  {
322  ASSERT(idx_b < hmm_idx->num_b)
323  hmm_idx->idx_b_rows[idx_b]=i;
324  hmm_idx->idx_b_cols[idx_b++]=j;
325  }
326  }
327  }
328 
329  return true;
330 }
331 
333 {
334  int32_t num=0;
335 
336  if (pos && neg)
337  {
338  num+=1; //zeroth- component
339 
340  if (poslinear)
341  num+=pos->get_N()*pos->get_M();
342  else
343  {
345  }
346 
347  if (neglinear)
348  num+=neg->get_N()*neg->get_M();
349  else
350  {
352  }
353 
354  //num+=1; //zeroth- component
355  //num+= (poslinear) ? (pos->get_N()*pos->get_M()) : (pos->get_N()*(1+pos->get_N()+1+pos->get_M()));
356  //num+= (neglinear) ? (neg->get_N()*neg->get_M()) : (neg->get_N()*(1+neg->get_N()+1+neg->get_M()));
357  }
358  return num;
359 }
360 
361 void CTOPFeatures::init()
362 {
363  pos = NULL;
364  neg = NULL;
365  neglinear = false;
366  poslinear = false;
367 
368  memset(&pos_relevant_indizes, 0, sizeof(pos_relevant_indizes));
369  memset(&neg_relevant_indizes, 0, sizeof(neg_relevant_indizes));
370 
371  unset_generic();
372  //TODO serialize HMMs
373  //m_parameters->add((CSGObject**) &pos, "pos", "HMM for positive class.");
374  //m_parameters->add((CSGObject**) &neg, "neg", "HMM for negative class.");
375  m_parameters->add(&neglinear, "neglinear", "If negative HMM is a LinearHMM");
376  m_parameters->add(&poslinear, "poslinear", "If positive HMM is a LinearHMM");
377 }
T_HMM_INDIZES pos_relevant_indizes
Definition: TOPFeatures.h:154
#define SG_INFO(...)
Definition: SGIO.h:118
#define SG_DONE()
Definition: SGIO.h:157
The class DenseFeatures implements dense feature matrices.
Definition: LDA.h:40
int32_t get_M() const
access function for number of observations M
Definition: HMM.h:984
virtual int32_t get_num_vectors() const
void unset_generic()
Definition: SGObject.cpp:336
virtual float64_t * set_feature_matrix()
void set_models(CHMM *p, CHMM *n)
Definition: TOPFeatures.cpp:64
#define SG_ERROR(...)
Definition: SGIO.h:129
Parameter * m_parameters
Definition: SGObject.h:546
float64_t get_b(T_STATES line_, uint16_t column) const
Definition: HMM.h:1157
T_HMM_INDIZES neg_relevant_indizes
Definition: TOPFeatures.h:156
float64_t linear_model_derivative(T_STATES i, uint16_t j, int32_t dimension)
Definition: HMM.h:1393
#define SG_REF(x)
Definition: SGObject.h:54
int32_t num_features
number of features in cache
CStringFeatures< uint16_t > * get_observations()
return observation pointer
Definition: HMM.h:799
virtual float64_t * compute_feature_vector(int32_t num, int32_t &len, float64_t *target=NULL)
Definition: TOPFeatures.cpp:86
static const float64_t ALMOST_NEG_INFTY
almost neg (log) infinity
Definition: Math.h:2052
float64_t model_probability(int32_t dimension=-1)
inline proxy for model probability.
Definition: HMM.h:574
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37
#define ASSERT(x)
Definition: SGIO.h:201
virtual int32_t get_num_vectors() const
float64_t model_derivative_q(T_STATES i, int32_t dimension)
Definition: HMM.h:1418
double float64_t
Definition: common.h:50
float64_t get_q(T_STATES offset) const
Definition: HMM.h:1088
SGMatrix< float64_t > feature_matrix
float64_t model_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
computes log dp(lambda)/d a_ij.
Definition: HMM.h:1424
The class TOPFeatures implements TOP kernel features obtained from two Hidden Markov models...
Definition: TOPFeatures.h:70
float64_t linear_model_probability(int32_t dimension)
Definition: HMM.h:593
#define SG_UNREF(x)
Definition: SGObject.h:55
#define SG_DEBUG(...)
Definition: SGIO.h:107
float64_t get_a(T_STATES line_, T_STATES column) const
Definition: HMM.h:1129
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
float64_t model_derivative_p(T_STATES i, int32_t dimension)
Definition: HMM.h:1410
float64_t get_p(T_STATES offset) const
Definition: HMM.h:1101
bool compute_relevant_indizes(CHMM *hmm, T_HMM_INDIZES *hmm_idx)
int32_t num_vectors
number of vectors in cache
int32_t compute_num_features()
Hidden Markov Model.
Definition: HMM.h:369
T_STATES get_N() const
access function for number of states N
Definition: HMM.h:981
float64_t model_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
computes log dp(lambda)/d b_ij.
Definition: HMM.h:1435

SHOGUN Machine Learning Toolbox - Documentation