SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LinearHMM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
13 #include <shogun/io/SGIO.h>
14 
15 #include <shogun/base/Parameter.h>
16 
19 
20 using namespace shogun;
21 
23 {
24  init();
25 }
26 
28 : CDistribution()
29 {
30  init();
31 
32  set_features(f);
34  num_symbols = (int32_t) f->get_num_symbols();
36 }
37 
38 CLinearHMM::CLinearHMM(int32_t p_num_features, int32_t p_num_symbols)
39 : CDistribution()
40 {
41  init();
42 
43  sequence_length = p_num_features;
44  num_symbols = p_num_symbols;
46 }
47 
49 {
50  SG_FREE(transition_probs);
51  SG_FREE(log_transition_probs);
52 }
53 
55 {
56  if (data)
57  {
58  if (data->get_feature_class() != C_STRING ||
59  data->get_feature_type() != F_WORD)
60  {
61  SG_ERROR("Expected features of class string type word!\n")
62  }
63  set_features(data);
64  }
65  SG_FREE(transition_probs);
66  SG_FREE(log_transition_probs);
67  int32_t* int_transition_probs=SG_MALLOC(int32_t, num_params);
68 
69  int32_t vec;
70  int32_t i;
71 
72  for (i=0; i< num_params; i++)
73  int_transition_probs[i]=0;
74 
75  for (vec=0; vec<features->get_num_vectors(); vec++)
76  {
77  int32_t len;
78  bool free_vec;
79 
80  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
81  get_feature_vector(vec, len, free_vec);
82 
83  //just count the symbols per position -> transition_probsogram
84  for (int32_t feat=0; feat<len ; feat++)
85  int_transition_probs[feat*num_symbols+vector[feat]]++;
86 
88  free_feature_vector(vector, vec, free_vec);
89  }
90 
91  //trade memory for speed
92  transition_probs=SG_MALLOC(float64_t, num_params);
93  log_transition_probs=SG_MALLOC(float64_t, num_params);
94 
95  for (i=0;i<sequence_length;i++)
96  {
97  for (int32_t j=0; j<num_symbols; j++)
98  {
99  float64_t sum=0;
100  int32_t offs=i*num_symbols+
102  get_masked_symbols((uint16_t)j,(uint8_t) 254);
103  int32_t original_num_symbols=(int32_t)
105  get_original_num_symbols();
106 
107  for (int32_t k=0; k<original_num_symbols; k++)
108  sum+=int_transition_probs[offs+k];
109 
110  transition_probs[i*num_symbols+j]=
111  (int_transition_probs[i*num_symbols+j]+pseudo_count)/
113  get_original_num_symbols()*pseudo_count);
114  log_transition_probs[i*num_symbols+j]=
115  log(transition_probs[i*num_symbols+j]);
116  }
117  }
118 
119  SG_FREE(int_transition_probs);
120  return true;
121 }
122 
124  const int32_t* indizes, int32_t num_indizes, float64_t pseudo)
125 {
126  SG_FREE(transition_probs);
127  SG_FREE(log_transition_probs);
128  int32_t* int_transition_probs=SG_MALLOC(int32_t, num_params);
129  int32_t vec;
130  int32_t i;
131 
132  for (i=0; i< num_params; i++)
133  int_transition_probs[i]=0;
134 
135  for (vec=0; vec<num_indizes; vec++)
136  {
137  int32_t len;
138  bool free_vec;
139 
140  ASSERT(indizes[vec]>=0 &&
141  indizes[vec]<((CStringFeatures<uint16_t>*) features)->
142  get_num_vectors());
143  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
144  get_feature_vector(indizes[vec], len, free_vec);
146  free_feature_vector(vector, indizes[vec], free_vec);
147 
148  //just count the symbols per position -> transition_probsogram
149  //
150  for (int32_t feat=0; feat<len ; feat++)
151  int_transition_probs[feat*num_symbols+vector[feat]]++;
152  }
153 
154  //trade memory for speed
155  transition_probs=SG_MALLOC(float64_t, num_params);
156  log_transition_probs=SG_MALLOC(float64_t, num_params);
157 
158  for (i=0;i<sequence_length;i++)
159  {
160  for (int32_t j=0; j<num_symbols; j++)
161  {
162  float64_t sum=0;
163  int32_t original_num_symbols=(int32_t)
165  get_original_num_symbols();
166  for (int32_t k=0; k<original_num_symbols; k++)
167  {
168  sum+=int_transition_probs[i*num_symbols+
170  get_masked_symbols((uint16_t)j,(uint8_t) 254)+k];
171  }
172 
173  transition_probs[i*num_symbols+j]=
174  (int_transition_probs[i*num_symbols+j]+pseudo)/
176  get_original_num_symbols()*pseudo);
177  log_transition_probs[i*num_symbols+j]=
178  log(transition_probs[i*num_symbols+j]);
179  }
180  }
181 
182  SG_FREE(int_transition_probs);
183  return true;
184 }
185 
186 float64_t CLinearHMM::get_log_likelihood_example(uint16_t* vector, int32_t len)
187 {
188  float64_t result=log_transition_probs[vector[0]];
189 
190  for (int32_t i=1; i<len; i++)
191  result+=log_transition_probs[i*num_symbols+vector[i]];
192 
193  return result;
194 }
195 
197 {
198  int32_t len;
199  bool free_vec;
200  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
201  get_feature_vector(num_example, len, free_vec);
202  float64_t result=get_log_likelihood_example(vector, len);
203 
205  free_feature_vector(vector, num_example, free_vec);
206 
207  return result;
208 }
209 
210 float64_t CLinearHMM::get_likelihood_example(uint16_t* vector, int32_t len)
211 {
212  float64_t result=transition_probs[vector[0]];
213 
214  for (int32_t i=1; i<len; i++)
215  result*=transition_probs[i*num_symbols+vector[i]];
216 
217  return result;
218 }
219 
221 {
222  int32_t len;
223  bool free_vec;
224  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
225  get_feature_vector(num_example, len, free_vec);
226 
227  float64_t result=get_likelihood_example(vector, len);
228 
230  free_feature_vector(vector, num_example, free_vec);
231 
232  return result;
233 }
234 
235 float64_t CLinearHMM::get_log_derivative(int32_t num_param, int32_t num_example)
236 {
237  int32_t len;
238  bool free_vec;
239  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
240  get_feature_vector(num_example, len, free_vec);
241  float64_t result=0;
242  int32_t position=num_param/num_symbols;
243  ASSERT(position>=0 && position<len)
244  uint16_t sym=(uint16_t) (num_param-position*num_symbols);
245 
246  if (vector[position]==sym && transition_probs[num_param]!=0)
247  result=1.0/transition_probs[num_param];
249  free_feature_vector(vector, num_example, free_vec);
250 
251  return result;
252 }
253 
255 {
257 }
258 
260 {
261  ASSERT(probs.vlen == num_params)
262 
265 
266  if (!transition_probs)
268 
269  for (int32_t i=0; i<num_params; i++)
270  {
271  transition_probs[i]=probs.vector[i];
273  }
274 
275  return true;
276 }
277 
279 {
281 }
282 
284 {
285  ASSERT(probs.vlen == num_params)
286 
289 
290  if (!transition_probs)
292 
293  for (int32_t i=0; i<num_params; i++)
294  {
295  log_transition_probs[i]=probs.vector[i];
297  }
298 
299  return true;
300 }
301 
303 {
305 
307 }
308 
309 void CLinearHMM::init()
310 {
311  sequence_length = 0;
312  num_symbols = 0;
313  num_params = 0;
314  transition_probs = NULL;
315  log_transition_probs = NULL;
316 
318  "transition_probs", "Transition probabilities.");
320  "log_transition_probs", "Transition probabilities (logspace).");
321 }

SHOGUN Machine Learning Toolbox - Documentation