SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HMSVMModel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Fernando José Iglesias García
8  * Copyright (C) 2012 Fernando José Iglesias García
9  */
10 
14 
15 using namespace shogun;
16 
19 {
20  init();
21 }
22 
23 CHMSVMModel::CHMSVMModel(CFeatures* features, CStructuredLabels* labels, EStateModelType smt, int32_t num_obs)
24 : CStructuredModel(features, labels)
25 {
26  init();
27 
28  m_num_obs = num_obs;
29  // Shorthand for the number of free states
30  int32_t free_states = ((CHMSVMLabels*) m_labels)->get_num_states();
31  // Shorthand for the number of features of the feature vector
32  int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
33  m_num_aux = free_states*D*(num_obs-1);
34 
35  switch (smt)
36  {
37  case SMT_TWO_STATE:
38  m_state_model = new CTwoStateModel();
39  break;
40  case SMT_UNKNOWN:
41  default:
42  SG_ERROR("The EStateModelType given is not valid\n");
43  }
44 
45  int32_t S = m_state_model->get_num_states();
46  m_transmission_weights = SGMatrix< float64_t >(S,S);
47  m_emission_weights = SGVector< float64_t >(S*D*m_num_obs);
48 }
49 
51 {
52  SG_UNREF(m_state_model);
53 }
54 
55 int32_t CHMSVMModel::get_dim() const
56 {
57  // Shorthand for the number of states
58  int32_t S = ((CHMSVMLabels*) m_labels)->get_num_states();
59  // Shorthand for the number of features of the feature vector
60  int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
61 
62  return S*(S + D*m_num_obs);
63 }
64 
66  int32_t feat_idx,
67  CStructuredData* y)
68 {
69  // Shorthand for the number of features of the feature vector
71  int32_t D = mf->get_num_features();
72 
73  // Get the sequence of labels
75 
76  // Initialize psi
78  psi.zero();
79 
80  // Translate from labels sequence to state sequence
81  SGVector< int32_t > state_seq = m_state_model->labels_to_states(label_seq);
82  m_transmission_weights.zero();
83 
84  for ( int32_t i = 0 ; i < state_seq.vlen-1 ; ++i )
85  m_transmission_weights(state_seq[i],state_seq[i+1]) += 1;
86 
87  SGMatrix< float64_t > obs = mf->get_feature_vector(feat_idx);
88  ASSERT(obs.num_rows == D && obs.num_cols == state_seq.vlen);
89  m_emission_weights.zero();
90  index_t aux_idx, weight_idx;
91 
92  for ( int32_t f = 0 ; f < D ; ++f )
93  {
94  aux_idx = f*m_num_obs;
95 
96  for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
97  {
98  weight_idx = aux_idx + state_seq[j]*D*m_num_obs + obs(f,j);
99  m_emission_weights[weight_idx] += 1;
100  }
101  }
102 
103  m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
104  D, m_num_obs);
105 
106  return psi;
107 }
108 
111  int32_t feat_idx,
112  bool const training)
113 {
114  int32_t dim = get_dim();
115  ASSERT( w.vlen == get_dim() );
116 
117  // Shorthand for the number of features of the feature vector
119  int32_t D = mf->get_num_features();
120  // Shorthand for the number of states
121  int32_t S = m_state_model->get_num_states();
122 
123  // Distribution of start states
124  SGVector< float64_t > p = m_state_model->get_start_states();
125  // Distribution of stop states
126  SGVector< float64_t > q = m_state_model->get_stop_states();
127 
128  // Compute the loss-augmented emission matrix:
129  // E_{s,i} = w^{em}_s \cdot x_i + Delta(y_i, s)
130 
131  SGMatrix< float64_t > x = mf->get_feature_vector(feat_idx);
132 
133  int32_t T = x.num_cols;
134  SGMatrix< float64_t > E(S, T);
135  E.zero();
136  index_t em_idx;
137  m_state_model->reshape_emission_params(m_emission_weights, w, D, m_num_obs);
138 
139  for ( int32_t i = 0 ; i < T ; ++i )
140  {
141  for ( int32_t j = 0 ; j < D ; ++j )
142  {
143  //FIXME make independent of observation values
144  em_idx = j*m_num_obs + (index_t)CMath::round(x(j,i));
145 
146  for ( int32_t s = 0 ; s < S ; ++s )
147  E(s,i) += m_emission_weights[s*D*m_num_obs + em_idx];
148  }
149  }
150 
151  // If argmax used while training, add to E the loss matrix
152  if ( training )
153  {
154  CSequence* ytrue =
156 
157  REQUIRE(ytrue->get_data().size() == T, "T, the length of the feature "
158  "x^i (%d) and the length of its corresponding label y^i "
159  "(%d) must be the same.\n", T, ytrue->get_data().size());
160 
161  SGMatrix< float64_t > loss_matrix = m_state_model->loss_matrix(ytrue);
162 
163  ASSERT(loss_matrix.num_rows == E.num_rows &&
164  loss_matrix.num_cols == E.num_cols);
165 
167  1.0, loss_matrix.matrix, E.num_rows*E.num_cols);
168 
169  // Decrement the reference count corresponding to get_label above
170  SG_UNREF(ytrue);
171  }
172 
173  // Initialize the dynamic programming table and the traceback matrix
174  SGMatrix< float64_t > dp(T, S);
175  SGMatrix< float64_t > trb(T, S);
176  m_state_model->reshape_transmission_params(m_transmission_weights, w);
177 
178  for ( int32_t s = 0 ; s < S ; ++s )
179  {
180  if ( p[s] > -CMath::INFTY )
181  {
182  // dp(0,s) = E(s,0)
183  dp(0,s) = E[s];
184  }
185  else
186  {
187  dp(0,s) = -CMath::INFTY;
188  }
189  }
190 
191  // Viterbi algorithm
192  int32_t idx;
193  float64_t tmp_score, e, a;
194 
195  for ( int32_t i = 1 ; i < T ; ++i )
196  {
197  for ( int32_t cur = 0 ; cur < S ; ++cur )
198  {
199  idx = cur*T + i;
200 
201  dp[idx] = -CMath::INFTY;
202  trb[idx] = -1;
203 
204  // e = E(cur,i)
205  e = E[i*S + cur];
206 
207  for ( int32_t prev = 0 ; prev < S ; ++prev )
208  {
209  // aij = m_transmission_weights(prev, cur)
210  a = m_transmission_weights[cur*S + prev];
211 
212  if ( a > -CMath::INFTY )
213  {
214  // tmp_score = e + a + dp(i-1, prev)
215  tmp_score = e + a + dp[prev*T + i-1];
216 
217  if ( tmp_score > dp[idx] )
218  {
219  dp[idx] = tmp_score;
220  trb[idx] = prev;
221  }
222  }
223  }
224  }
225  }
226 
227  // Trace back the most likely sequence of states
228  SGVector< int32_t > opt_path(T);
229  CResultSet* ret = new CResultSet();
230  SG_REF(ret);
231  ret->score = -CMath::INFTY;
232  opt_path[T-1] = -1;
233 
234  for ( int32_t s = 0 ; s < S ; ++s )
235  {
236  idx = s*T + T-1;
237 
238  if ( q[s] > -CMath::INFTY && dp[idx] > ret->score )
239  {
240  ret->score = dp[idx];
241  opt_path[T-1] = s;
242  }
243  }
244 
245  for ( int32_t i = T-1 ; i > 0 ; --i )
246  opt_path[i-1] = trb[opt_path[i]*T + i];
247 
248  // Populate the CResultSet object to return
249  CSequence* ypred = m_state_model->states_to_labels(opt_path);
250 
251  ret->psi_pred = get_joint_feature_vector(feat_idx, ypred);
252  ret->argmax = ypred;
253  if ( training )
254  {
255  ret->delta = CStructuredModel::delta_loss(feat_idx, ypred);
257  feat_idx, feat_idx);
259  ret->psi_truth.vector, dim);
260  }
261 
262  return ret;
263 }
264 
266 {
269 
270  // Compute the Hamming loss, number of distinct elements in the sequences
271  return m_state_model->loss(seq1, seq2);
272 }
273 
282 {
283  // Shorthand for the number of free states
284  int32_t S = ((CHMSVMLabels*) m_labels)->get_num_states();
285  // Shorthand for the number of features of the feature vector
286  int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
287 
288  // Monotonicity constraints for feature scoring functions
289  SGVector< int32_t > monotonicity = m_state_model->get_monotonicity(S,D);
290 
291  // Quadratic regularizer
292 
293  float64_t C_small = 5.0;
294  float64_t C_smooth = 10.0;
295  // TODO change the representation of C to sparse matrix
296  C = SGMatrix< float64_t >(get_dim()+m_num_aux, get_dim()+m_num_aux);
297  C.zero();
298  for ( int32_t i = 0 ; i < get_dim() ; ++i )
299  C(i,i) = C_small;
300  for ( int32_t i = get_dim() ; i < get_dim()+m_num_aux ; ++i )
301  C(i,i) = C_smooth;
302 
303  // Smoothness constraints
304 
305  // For each auxiliary variable, there are two different constraints
306  // TODO change the representation of A to sparse matrix
307  A = SGMatrix< float64_t >(2*m_num_aux, get_dim()+m_num_aux);
308  A.zero();
309 
310  // Indices to the beginning of the blocks of scores. Each block is
311  // formed by the scores of a pair (state, feature)
312  SGVector< int32_t > score_starts(S*D);
313  for ( int32_t idx = S*S, k = 0 ; k < S*D ; idx += m_num_obs, ++k )
314  score_starts[k] = idx;
315 
316  // Indices to the beginning of the blocks of variables for smoothness
317  SGVector< int32_t > aux_starts_smooth(S*D);
318  for ( int32_t idx = get_dim(), k = 0 ; k < S*D ; idx += m_num_obs-1, ++k )
319  aux_starts_smooth[k] = idx;
320 
321  // Bound the difference between adjacent score values from above and
322  // below by an auxiliary variable (which then is regularized
323  // quadratically)
324 
325  int32_t con_idx = 0, scr_idx, aux_idx;
326 
327  for ( int32_t i = 0 ; i < score_starts.vlen ; ++i )
328  {
329  scr_idx = score_starts[i];
330  aux_idx = aux_starts_smooth[i];
331 
332  for ( int32_t j = 0 ; j < m_num_obs-1 ; ++j )
333  {
334  A(con_idx, scr_idx) = 1;
335  A(con_idx, scr_idx+1) = -1;
336 
337  if ( monotonicity[i] != 1 )
338  A(con_idx, aux_idx) = -1;
339  ++con_idx;
340 
341  A(con_idx, scr_idx) = -1;
342  A(con_idx, scr_idx+1) = 1;
343 
344  if ( monotonicity[i] != -1 )
345  A(con_idx, aux_idx) = -1;
346  ++con_idx;
347 
348  ++scr_idx, ++aux_idx;
349  }
350  }
351 
352  // Bounds for the smoothness constraints
353  b = SGVector< float64_t >(2*m_num_aux);
354  b.zero();
355 }
356 
358 {
359  // Shorthand for the labels in the correct type
360  CHMSVMLabels* hmsvm_labels = (CHMSVMLabels*) m_labels;
361  // Frequency of each state
362  SGVector< int32_t > state_freq( hmsvm_labels->get_num_states() );
363  state_freq.zero();
364 
365  CSequence* seq;
366  int32_t state;
367  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_labels() ; ++i )
368  {
369  seq = CSequence::obtain_from_generic(hmsvm_labels->get_label(i));
370 
371  SGVector<int32_t> seq_data = seq->get_data();
372  for ( int32_t j = 0 ; j < seq_data.size() ; ++j )
373  {
374  state = seq_data[j];
375 
376  if ( state < 0 || state >= hmsvm_labels->get_num_states() )
377  {
378  SG_ERROR("Found state out of {0, 1, ..., "
379  "num_states-1}\n");
380  return false;
381  }
382  else
383  {
384  ++state_freq[state];
385  }
386  }
387 
388  // Decrement the reference count increased by get_label
389  SG_UNREF(seq);
390  }
391 
392  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_states() ; ++i )
393  {
394  if ( state_freq[i] <= 0 )
395  {
396  SG_ERROR("What? State %d has never appeared\n", i);
397  return false;
398  }
399  }
400 
401  return true;
402 }
403 
404 void CHMSVMModel::init()
405 {
406  SG_ADD(&m_num_states, "m_num_states", "The number of states", MS_NOT_AVAILABLE);
407  SG_ADD((CSGObject**) &m_state_model, "m_state_model", "The state model", MS_NOT_AVAILABLE);
408  SG_ADD(&m_transmission_weights, "m_transmission_weights",
409  "Transmission weights used in Viterbi", MS_NOT_AVAILABLE);
410  SG_ADD(&m_emission_weights, "m_emission_weights",
411  "Emission weights used in Viterbi", MS_NOT_AVAILABLE);
412 
413  m_num_states = 0;
414  m_num_obs = 0;
415  m_num_aux = 0;
416  m_state_model = NULL;
417 }
418 
420 {
421  return m_num_aux;
422 }
423 
425 {
426  return 2*m_num_aux;
427 }

SHOGUN Machine Learning Toolbox - Documentation