SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HMSVMModel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Fernando José Iglesias García
8  * Copyright (C) 2012 Fernando José Iglesias García
9  */
10 
14 #include <shogun/structure/Plif.h>
15 
16 using namespace shogun;
17 
20 {
21  init();
22 }
23 
24 CHMSVMModel::CHMSVMModel(CFeatures* features, CStructuredLabels* labels, EStateModelType smt, int32_t num_obs, bool use_plifs)
25 : CStructuredModel(features, labels)
26 {
27  init();
28  m_num_obs = num_obs;
29  m_num_plif_nodes = 20;
30  m_use_plifs = use_plifs;
31 
32  switch (smt)
33  {
34  case SMT_TWO_STATE:
35  m_state_model = new CTwoStateModel();
36  break;
37  case SMT_UNKNOWN:
38  default:
39  SG_ERROR("The EStateModelType given is not valid\n")
40  }
41 }
42 
44 {
45  SG_UNREF(m_state_model);
46  SG_UNREF(m_plif_matrix);
47 }
48 
49 int32_t CHMSVMModel::get_dim() const
50 {
51  // Shorthand for the number of free states
52  int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
54  int32_t D = mf->get_num_features();
55 
56  if ( m_use_plifs )
57  return free_states*(free_states + D*m_num_plif_nodes);
58  else
59  return free_states*(free_states + D*m_num_obs);
60 }
61 
63  int32_t feat_idx,
64  CStructuredData* y)
65 {
66  // Shorthand for the number of features of the feature vector
68  int32_t D = mf->get_num_features();
69 
70  // Get the sequence of labels
72 
73  // Initialize psi
75  psi.zero();
76 
77  // Translate from labels sequence to state sequence
78  SGVector< int32_t > state_seq = m_state_model->labels_to_states(label_seq);
79  m_transmission_weights.zero();
80 
81  for ( int32_t i = 0 ; i < state_seq.vlen-1 ; ++i )
82  m_transmission_weights(state_seq[i],state_seq[i+1]) += 1;
83 
84  SGMatrix< float64_t > obs = mf->get_feature_vector(feat_idx);
85  REQUIRE(obs.num_rows == D && obs.num_cols == state_seq.vlen,
86  "obs.num_rows (%d) != D (%d) OR obs.num_cols (%d) != state_seq.vlen (%d)\n",
87  obs.num_rows, D, obs.num_cols, state_seq.vlen)
88  m_emission_weights.zero();
89  index_t aux_idx, weight_idx;
90 
91  if ( !m_use_plifs ) // Do not use PLiFs
92  {
93  for ( int32_t f = 0 ; f < D ; ++f )
94  {
95  aux_idx = f*m_num_obs;
96 
97  for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
98  {
99  weight_idx = aux_idx + state_seq[j]*D*m_num_obs + obs(f,j);
100  m_emission_weights[weight_idx] += 1;
101  }
102  }
103 
104  m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
105  D, m_num_obs);
106  }
107  else // Use PLiFs
108  {
109  int32_t S = m_state_model->get_num_states();
110 
111  for ( int32_t f = 0 ; f < D ; ++f )
112  {
113  aux_idx = f*m_num_plif_nodes;
114 
115  for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
116  {
117  CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + state_seq[j]);
118  SGVector<float64_t> limits = plif->get_plif_limits();
119  // The number of supporting points smaller or equal than value
120  int32_t count = 0;
121  // The observation value
122  float64_t value = obs(f,j);
123 
124  for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
125  {
126  if ( limits[i] <= value )
127  ++count;
128  else
129  break;
130  }
131 
132  weight_idx = aux_idx + state_seq[j]*D*m_num_plif_nodes;
133 
134  if ( count == 0 )
135  m_emission_weights[weight_idx] += 1;
136  else if ( count == m_num_plif_nodes )
137  m_emission_weights[weight_idx + m_num_plif_nodes-1] += 1;
138  else
139  {
140  m_emission_weights[weight_idx + count] +=
141  (value-limits[count-1]) / (limits[count]-limits[count-1]);
142 
143  m_emission_weights[weight_idx + count-1] +=
144  (limits[count]-value) / (limits[count]-limits[count-1]);
145  }
146 
147  SG_UNREF(plif);
148  }
149  }
150 
151  m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
152  D, m_num_plif_nodes);
153  }
154 
155  return psi;
156 }
157 
160  int32_t feat_idx,
161  bool const training)
162 {
163  int32_t dim = get_dim();
164  ASSERT( w.vlen == get_dim() )
165 
166  // Shorthand for the number of features of the feature vector
168  int32_t D = mf->get_num_features();
169  // Shorthand for the number of states
170  int32_t S = m_state_model->get_num_states();
171 
172  if ( m_use_plifs )
173  {
174  REQUIRE(m_plif_matrix, "PLiF matrix not allocated, has the SO machine been trained with "
175  "the use_plifs option?\n");
176  REQUIRE(m_plif_matrix->get_num_elements() == S*D, "Dimension mismatch in PLiF matrix, have the "
177  "feature dimension and/or number of states changed from training to prediction?\n");
178  }
179 
180  // Distribution of start states
181  SGVector< float64_t > p = m_state_model->get_start_states();
182  // Distribution of stop states
183  SGVector< float64_t > q = m_state_model->get_stop_states();
184 
185  // Compute the loss-augmented emission matrix:
186  // E_{s,i} = w^{em}_s \cdot x_i + Delta(y_i, s)
187 
188  SGMatrix< float64_t > x = mf->get_feature_vector(feat_idx);
189 
190  int32_t T = x.num_cols;
191  SGMatrix< float64_t > E(S, T);
192  E.zero();
193 
194  if ( !m_use_plifs ) // Do not use PLiFs
195  {
196  index_t em_idx;
197  m_state_model->reshape_emission_params(m_emission_weights, w, D, m_num_obs);
198 
199  for ( int32_t i = 0 ; i < T ; ++i )
200  {
201  for ( int32_t j = 0 ; j < D ; ++j )
202  {
203  //FIXME make independent of observation values
204  em_idx = j*m_num_obs + (index_t)CMath::round(x(j,i));
205 
206  for ( int32_t s = 0 ; s < S ; ++s )
207  E(s,i) += m_emission_weights[s*D*m_num_obs + em_idx];
208  }
209  }
210  }
211  else // Use PLiFs
212  {
213  m_state_model->reshape_emission_params(m_plif_matrix, w, D, m_num_plif_nodes);
214 
215  for ( int32_t i = 0 ; i < T ; ++i )
216  {
217  for ( int32_t f = 0 ; f < D ; ++f )
218  {
219  for ( int32_t s = 0 ; s < S ; ++s )
220  {
221  CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + s);
222  E(s,i) += plif->lookup( x(f,i) );
223  SG_UNREF(plif);
224  }
225  }
226  }
227  }
228 
229  // If argmax used while training, add to E the loss matrix (loss-augmented inference)
230  if ( training )
231  {
232  CSequence* ytrue =
234 
235  REQUIRE(ytrue->get_data().size() == T, "T, the length of the feature "
236  "x^i (%d) and the length of its corresponding label y^i "
237  "(%d) must be the same.\n", T, ytrue->get_data().size());
238 
239  SGMatrix< float64_t > loss_matrix = m_state_model->loss_matrix(ytrue);
240 
241  ASSERT(loss_matrix.num_rows == E.num_rows &&
242  loss_matrix.num_cols == E.num_cols);
243 
245  1.0, loss_matrix.matrix, E.num_rows*E.num_cols);
246 
247  // Decrement the reference count corresponding to get_label above
248  SG_UNREF(ytrue);
249  }
250 
251  // Initialize the dynamic programming table and the traceback matrix
252  SGMatrix< float64_t > dp(T, S);
253  SGMatrix< float64_t > trb(T, S);
254  m_state_model->reshape_transmission_params(m_transmission_weights, w);
255 
256  for ( int32_t s = 0 ; s < S ; ++s )
257  {
258  if ( p[s] > -CMath::INFTY )
259  {
260  // dp(0,s) = E(s,0)
261  dp(0,s) = E[s];
262  }
263  else
264  {
265  dp(0,s) = -CMath::INFTY;
266  }
267  }
268 
269  // Viterbi algorithm
270  int32_t idx;
271  float64_t tmp_score, e, a;
272 
273  for ( int32_t i = 1 ; i < T ; ++i )
274  {
275  for ( int32_t cur = 0 ; cur < S ; ++cur )
276  {
277  idx = cur*T + i;
278 
279  dp[idx] = -CMath::INFTY;
280  trb[idx] = -1;
281 
282  // e = E(cur,i)
283  e = E[i*S + cur];
284 
285  for ( int32_t prev = 0 ; prev < S ; ++prev )
286  {
287  // aij = m_transmission_weights(prev, cur)
288  a = m_transmission_weights[cur*S + prev];
289 
290  if ( a > -CMath::INFTY )
291  {
292  // tmp_score = e + a + dp(i-1, prev)
293  tmp_score = e + a + dp[prev*T + i-1];
294 
295  if ( tmp_score > dp[idx] )
296  {
297  dp[idx] = tmp_score;
298  trb[idx] = prev;
299  }
300  }
301  }
302  }
303  }
304 
305  // Trace back the most likely sequence of states
306  SGVector< int32_t > opt_path(T);
307  CResultSet* ret = new CResultSet();
308  SG_REF(ret);
309  ret->psi_computed = true;
310  ret->score = -CMath::INFTY;
311  opt_path[T-1] = -1;
312 
313  for ( int32_t s = 0 ; s < S ; ++s )
314  {
315  idx = s*T + T-1;
316 
317  if ( q[s] > -CMath::INFTY && dp[idx] > ret->score )
318  {
319  ret->score = dp[idx];
320  opt_path[T-1] = s;
321  }
322  }
323 
324  REQUIRE(opt_path[T-1]!=-1, "Viterbi decoding found no possible sequence states.\n"
325  "Maybe the state model used cannot produce such sequence.\n"
326  "If using the TwoStateModel, please use sequences of length greater than two.\n");
327 
328  for ( int32_t i = T-1 ; i > 0 ; --i )
329  opt_path[i-1] = trb[opt_path[i]*T + i];
330 
331  // Populate the CResultSet object to return
332  CSequence* ypred = m_state_model->states_to_labels(opt_path);
333 
334  ret->psi_pred = get_joint_feature_vector(feat_idx, ypred);
335  ret->argmax = ypred;
336  if ( training )
337  {
338  ret->delta = CStructuredModel::delta_loss(feat_idx, ypred);
339  ret->psi_truth = CStructuredModel::get_joint_feature_vector(feat_idx, feat_idx);
341  }
342 
343  return ret;
344 }
345 
347 {
350 
351  // Compute the Hamming loss, number of distinct elements in the sequences
352  return m_state_model->loss(seq1, seq2);
353 }
354 
356  float64_t regularization,
364 {
365  // Shorthand for the number of free states (i.e. states for which parameters are learnt)
366  int32_t S = ((CSequenceLabels*) m_labels)->get_num_states();
367  // Shorthand for the number of features of the feature vector
368  int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
369 
370  // Monotonicity constraints for feature scoring functions
371  SGVector< int32_t > monotonicity = m_state_model->get_monotonicity(S,D);
372 
373  // Quadratic regularization
374 
375  float64_t C_small = regularization;
376  float64_t C_smooth = 0.02*regularization;
377  // TODO change the representation of C to sparse matrix
378  C = SGMatrix< float64_t >(get_dim()+m_num_aux, get_dim()+m_num_aux);
379  C.zero();
380  for ( int32_t i = 0 ; i < get_dim() ; ++i )
381  C(i,i) = C_small;
382  for ( int32_t i = get_dim() ; i < get_dim()+m_num_aux ; ++i )
383  C(i,i) = C_smooth;
384 
385  // Smoothness constraints
386 
387  // For each auxiliary variable, there are two different constraints
388  // TODO change the representation of A to sparse matrix
389  A = SGMatrix< float64_t >(2*m_num_aux, get_dim()+m_num_aux);
390  A.zero();
391 
392  // Indices to the beginning of the blocks of scores. Each block is
393  // formed by the scores of a pair (state, feature)
394  SGVector< int32_t > score_starts(S*D);
395  int32_t delta = m_use_plifs ? m_num_plif_nodes : m_num_obs;
396  for ( int32_t idx = S*S, k = 0 ; k < S*D ; idx += delta, ++k )
397  score_starts[k] = idx;
398 
399  // Indices to the beginning of the blocks of variables for smoothness
400  SGVector< int32_t > aux_starts_smooth(S*D);
401  for ( int32_t idx = get_dim(), k = 0 ; k < S*D ; idx += delta-1, ++k )
402  aux_starts_smooth[k] = idx;
403 
404  // Bound the difference between adjacent score values from above and
405  // below by an auxiliary variable (which then is regularized
406  // quadratically)
407 
408  int32_t con_idx = 0, scr_idx, aux_idx;
409 
410  for ( int32_t i = 0 ; i < score_starts.vlen ; ++i )
411  {
412  scr_idx = score_starts[i];
413  aux_idx = aux_starts_smooth[i];
414 
415  for ( int32_t j = 0 ; j < delta-1 ; ++j )
416  {
417  A(con_idx, scr_idx) = 1;
418  A(con_idx, scr_idx+1) = -1;
419 
420  if ( monotonicity[i] != 1 )
421  A(con_idx, aux_idx) = -1;
422  ++con_idx;
423 
424  A(con_idx, scr_idx) = -1;
425  A(con_idx, scr_idx+1) = 1;
426 
427  if ( monotonicity[i] != -1 )
428  A(con_idx, aux_idx) = -1;
429  ++con_idx;
430 
431  ++scr_idx, ++aux_idx;
432  }
433  }
434 
435  // Bounds for the smoothness constraints
436  b = SGVector< float64_t >(2*m_num_aux);
437  b.zero();
438 }
439 
441 {
442  // Shorthand for the labels in the correct type
443  CSequenceLabels* hmsvm_labels = (CSequenceLabels*) m_labels;
444  // Frequency of each state
445  SGVector< int32_t > state_freq( hmsvm_labels->get_num_states() );
446  state_freq.zero();
447 
448  CSequence* seq;
449  int32_t state;
450  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_labels() ; ++i )
451  {
452  seq = CSequence::obtain_from_generic(hmsvm_labels->get_label(i));
453 
454  SGVector<int32_t> seq_data = seq->get_data();
455  for ( int32_t j = 0 ; j < seq_data.size() ; ++j )
456  {
457  state = seq_data[j];
458 
459  if ( state < 0 || state >= hmsvm_labels->get_num_states() )
460  {
461  SG_ERROR("Found state out of {0, 1, ..., "
462  "num_states-1}\n");
463  return false;
464  }
465  else
466  {
467  ++state_freq[state];
468  }
469  }
470 
471  // Decrement the reference count increased by get_label
472  SG_UNREF(seq);
473  }
474 
475  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_states() ; ++i )
476  {
477  if ( state_freq[i] <= 0 )
478  {
479  SG_ERROR("What? State %d has never appeared\n", i)
480  return false;
481  }
482  }
483 
484  return true;
485 }
486 
487 void CHMSVMModel::init()
488 {
489  SG_ADD((CSGObject**) &m_state_model, "m_state_model", "The state model", MS_NOT_AVAILABLE);
490  SG_ADD(&m_transmission_weights, "m_transmission_weights",
491  "Transmission weights used in Viterbi", MS_NOT_AVAILABLE);
492  SG_ADD(&m_emission_weights, "m_emission_weights",
493  "Emission weights used in Viterbi", MS_NOT_AVAILABLE);
494  SG_ADD(&m_num_plif_nodes, "m_num_plif_nodes", "The number of points per PLiF",
495  MS_NOT_AVAILABLE); // FIXME It would actually make sense to do MS for this parameter
496  SG_ADD(&m_use_plifs, "m_use_plifs", "Whether to use plifs", MS_NOT_AVAILABLE);
497 
498  m_num_obs = 0;
499  m_num_aux = 0;
500  m_use_plifs = false;
501  m_state_model = NULL;
502  m_plif_matrix = NULL;
503  m_num_plif_nodes = 0;
504 }
505 
507 {
508  return m_num_aux;
509 }
510 
512 {
513  return 2*m_num_aux;
514 }
515 
516 void CHMSVMModel::set_use_plifs(bool use_plifs)
517 {
518  m_use_plifs = use_plifs;
519 }
520 
522 {
523  // Shorthands for the number of states, the matrix features and their dimension
524  int32_t S = m_state_model->get_num_states();
526  int32_t D = mf->get_num_features();
527 
528  // Transmission and emission weights allocation
529  m_transmission_weights = SGMatrix< float64_t >(S,S);
530  if ( m_use_plifs )
531  m_emission_weights = SGVector< float64_t >(S*D*m_num_plif_nodes);
532  else
533  m_emission_weights = SGVector< float64_t >(S*D*m_num_obs);
534 
535  // Auxiliary variables
536 
537  // Shorthand for the number of free states
538  int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
539  if ( m_use_plifs )
540  m_num_aux = free_states*D*(m_num_plif_nodes-1);
541  else
542  m_num_aux = free_states*D*(m_num_obs-1);
543 
544  if ( m_use_plifs )
545  {
546  // Initialize PLiF matrix
547  m_plif_matrix = new CDynamicObjectArray(S*D);
548  SG_REF(m_plif_matrix);
549 
550  // Determine the x values for the supporting points of the PLiFs
551 
552  // Count the number of points per feature, using all the feature vectors
553  int32_t N = 0;
554  for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
555  {
556  SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
557  N += feat_vec.num_cols;
558  }
559 
560  // Choose the supporting points so that roughly the same number of points fall in each bin
561  SGVector< float64_t > a = SGVector< float64_t >::linspace_vec(1, N, m_num_plif_nodes+1);
562  SGVector< index_t > signal_idxs(m_num_plif_nodes);
563  for ( int32_t i = 0 ; i < signal_idxs.vlen ; ++i )
564  signal_idxs[i] = (index_t) CMath::round( (a[i] + a[i+1]) / 2 ) - 1;
565 
566  SGVector< float64_t > signal(N);
567  index_t idx; // used to populate signal
568  for ( int32_t f = 0 ; f < D ; ++f )
569  {
570  // Get the points of feature f of all the feature vectors
571  idx = 0;
572  for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
573  {
574  SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
575  for ( int32_t j = 0 ; j < feat_vec.num_cols ; ++j )
576  signal[idx++] = feat_vec(f,j);
577  }
578 
579  signal.qsort();
580  SGVector< float64_t > limits(m_num_plif_nodes);
581  for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
582  limits[i] = signal[ signal_idxs[i] ];
583 
584  // Set the PLiFs' supporting points
585  for ( int32_t s = 0 ; s < S ; ++s )
586  {
587  CPlif* plif = new CPlif(m_num_plif_nodes);
588  plif->set_plif_limits(limits);
589  plif->set_min_value(-CMath::INFTY);
591  m_plif_matrix->push_back(plif);
592  }
593  }
594  }
595 }
596 
598 {
599  return m_transmission_weights;
600 }
601 
603 {
604  return m_emission_weights;
605 }
606 
608 {
609  SG_REF(m_state_model);
610  return m_state_model;
611 }

SHOGUN Machine Learning Toolbox - Documentation