SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LinearHMM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
13 #include <shogun/io/SGIO.h>
14 
15 #include <shogun/base/Parameter.h>
16 
19 
20 using namespace shogun;
21 
23 {
24  init();
25 }
26 
28 : CDistribution()
29 {
30  init();
31 
32  set_features(f);
34  num_symbols = (int32_t) f->get_num_symbols();
36 }
37 
38 CLinearHMM::CLinearHMM(int32_t p_num_features, int32_t p_num_symbols)
39 : CDistribution()
40 {
41  init();
42 
43  sequence_length = p_num_features;
44  num_symbols = p_num_symbols;
46 }
47 
49 {
50  SG_FREE(transition_probs);
51  SG_FREE(log_transition_probs);
52 }
53 
55 {
56  if (data)
57  {
58  if (data->get_feature_class() != C_STRING ||
59  data->get_feature_type() != F_WORD)
60  {
61  SG_ERROR("Expected features of class string type word!\n")
62  }
63  set_features(data);
64  }
65  SG_FREE(transition_probs);
66  SG_FREE(log_transition_probs);
67  int32_t* int_transition_probs=SG_MALLOC(int32_t, num_params);
68 
69  int32_t vec;
70  int32_t i;
71 
72  for (i=0; i< num_params; i++)
73  int_transition_probs[i]=0;
74 
75  for (vec=0; vec<features->get_num_vectors(); vec++)
76  {
77  int32_t len;
78  bool free_vec;
79 
80  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
81  get_feature_vector(vec, len, free_vec);
82 
83  //just count the symbols per position -> transition_probsogram
84  for (int32_t feat=0; feat<len ; feat++)
85  int_transition_probs[feat*num_symbols+vector[feat]]++;
86 
88  free_feature_vector(vector, vec, free_vec);
89  }
90 
91  //trade memory for speed
92  transition_probs=SG_MALLOC(float64_t, num_params);
93  log_transition_probs=SG_MALLOC(float64_t, num_params);
94 
95  for (i=0;i<sequence_length;i++)
96  {
97  for (int32_t j=0; j<num_symbols; j++)
98  {
99  float64_t sum=0;
100  int32_t offs=i*num_symbols+
102  get_masked_symbols((uint16_t)j,(uint8_t) 254);
103  int32_t original_num_symbols=(int32_t)
105  get_original_num_symbols();
106 
107  for (int32_t k=0; k<original_num_symbols; k++)
108  sum+=int_transition_probs[offs+k];
109 
110  transition_probs[i*num_symbols+j]=
111  (int_transition_probs[i*num_symbols+j]+pseudo_count)/
113  get_original_num_symbols()*pseudo_count);
114  log_transition_probs[i*num_symbols+j]=
115  log(transition_probs[i*num_symbols+j]);
116  }
117  }
118 
119  SG_FREE(int_transition_probs);
120  return true;
121 }
122 
124  const int32_t* indizes, int32_t num_indizes, float64_t pseudo)
125 {
126  SG_FREE(transition_probs);
127  SG_FREE(log_transition_probs);
128  int32_t* int_transition_probs=SG_MALLOC(int32_t, num_params);
129  int32_t vec;
130  int32_t i;
131 
132  for (i=0; i< num_params; i++)
133  int_transition_probs[i]=0;
134 
135  for (vec=0; vec<num_indizes; vec++)
136  {
137  int32_t len;
138  bool free_vec;
139 
140  ASSERT(indizes[vec]>=0 &&
141  indizes[vec]<((CStringFeatures<uint16_t>*) features)->
142  get_num_vectors());
143  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
144  get_feature_vector(indizes[vec], len, free_vec);
146  free_feature_vector(vector, indizes[vec], free_vec);
147 
148  //just count the symbols per position -> transition_probsogram
149  //
150  for (int32_t feat=0; feat<len ; feat++)
151  int_transition_probs[feat*num_symbols+vector[feat]]++;
152  }
153 
154  //trade memory for speed
155  transition_probs=SG_MALLOC(float64_t, num_params);
156  log_transition_probs=SG_MALLOC(float64_t, num_params);
157 
158  for (i=0;i<sequence_length;i++)
159  {
160  for (int32_t j=0; j<num_symbols; j++)
161  {
162  float64_t sum=0;
163  int32_t original_num_symbols=(int32_t)
165  get_original_num_symbols();
166  for (int32_t k=0; k<original_num_symbols; k++)
167  {
168  sum+=int_transition_probs[i*num_symbols+
170  get_masked_symbols((uint16_t)j,(uint8_t) 254)+k];
171  }
172 
173  transition_probs[i*num_symbols+j]=
174  (int_transition_probs[i*num_symbols+j]+pseudo)/
176  get_original_num_symbols()*pseudo);
177  log_transition_probs[i*num_symbols+j]=
178  log(transition_probs[i*num_symbols+j]);
179  }
180  }
181 
182  SG_FREE(int_transition_probs);
183  return true;
184 }
185 
186 float64_t CLinearHMM::get_log_likelihood_example(uint16_t* vector, int32_t len)
187 {
188  float64_t result=log_transition_probs[vector[0]];
189 
190  for (int32_t i=1; i<len; i++)
191  result+=log_transition_probs[i*num_symbols+vector[i]];
192 
193  return result;
194 }
195 
197 {
198  int32_t len;
199  bool free_vec;
200  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
201  get_feature_vector(num_example, len, free_vec);
202  float64_t result=log_transition_probs[vector[0]];
203 
204  for (int32_t i=1; i<len; i++)
205  result+=log_transition_probs[i*num_symbols+vector[i]];
206 
208  free_feature_vector(vector, num_example, free_vec);
209 
210  return result;
211 }
212 
213 float64_t CLinearHMM::get_likelihood_example(uint16_t* vector, int32_t len)
214 {
215  float64_t result=transition_probs[vector[0]];
216 
217  for (int32_t i=1; i<len; i++)
218  result*=transition_probs[i*num_symbols+vector[i]];
219 
220  return result;
221 }
222 
223 float64_t CLinearHMM::get_log_derivative(int32_t num_param, int32_t num_example)
224 {
225  int32_t len;
226  bool free_vec;
227  uint16_t* vector=((CStringFeatures<uint16_t>*) features)->
228  get_feature_vector(num_example, len, free_vec);
229  float64_t result=0;
230  int32_t position=num_param/num_symbols;
231  ASSERT(position>=0 && position<len)
232  uint16_t sym=(uint16_t) (num_param-position*num_symbols);
233 
234  if (vector[position]==sym && transition_probs[num_param]!=0)
235  result=1.0/transition_probs[num_param];
237  free_feature_vector(vector, num_example, free_vec);
238 
239  return result;
240 }
241 
243 {
245 }
246 
248 {
249  ASSERT(probs.vlen == num_params)
250 
253 
254  if (!transition_probs)
256 
257  for (int32_t i=0; i<num_params; i++)
258  {
259  transition_probs[i]=probs.vector[i];
261  }
262 
263  return true;
264 }
265 
267 {
269 }
270 
272 {
273  ASSERT(probs.vlen == num_params)
274 
277 
278  if (!transition_probs)
280 
281  for (int32_t i=0; i<num_params; i++)
282  {
283  log_transition_probs[i]=probs.vector[i];
285  }
286 
287  return true;
288 }
289 
291 {
293 
295 }
296 
297 void CLinearHMM::init()
298 {
299  sequence_length = 0;
300  num_symbols = 0;
301  num_params = 0;
302  transition_probs = NULL;
303  log_transition_probs = NULL;
304 
306  "transition_probs", "Transition probabilities.");
308  "log_transition_probs", "Transition probabilities (logspace).");
309 }

SHOGUN Machine Learning Toolbox - Documentation