SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HMSVMModel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Fernando José Iglesias García
8  * Copyright (C) 2012 Fernando José Iglesias García
9  */
10 
14 #include <shogun/structure/Plif.h>
15 
16 using namespace shogun;
17 
20 {
21  init();
22 }
23 
24 CHMSVMModel::CHMSVMModel(CFeatures* features, CStructuredLabels* labels, EStateModelType smt, int32_t num_obs, bool use_plifs)
25 : CStructuredModel(features, labels)
26 {
27  init();
28  m_num_obs = num_obs;
29  m_num_plif_nodes = 20;
30  m_use_plifs = use_plifs;
31 
32  switch (smt)
33  {
34  case SMT_TWO_STATE:
35  m_state_model = new CTwoStateModel();
36  break;
37  case SMT_UNKNOWN:
38  default:
39  SG_ERROR("The EStateModelType given is not valid\n")
40  }
41 }
42 
44 {
45  SG_UNREF(m_state_model);
46  SG_UNREF(m_plif_matrix);
47 }
48 
49 int32_t CHMSVMModel::get_dim() const
50 {
51  // Shorthand for the number of free states
52  int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
54  int32_t D = mf->get_num_features();
55 
56  if ( m_use_plifs )
57  return free_states*(free_states + D*m_num_plif_nodes);
58  else
59  return free_states*(free_states + D*m_num_obs);
60 }
61 
63  int32_t feat_idx,
64  CStructuredData* y)
65 {
66  // Shorthand for the number of features of the feature vector
68  int32_t D = mf->get_num_features();
69 
70  // Get the sequence of labels
72 
73  // Initialize psi
75  psi.zero();
76 
77  // Translate from labels sequence to state sequence
78  SGVector< int32_t > state_seq = m_state_model->labels_to_states(label_seq);
79  m_transmission_weights.zero();
80 
81  for ( int32_t i = 0 ; i < state_seq.vlen-1 ; ++i )
82  m_transmission_weights(state_seq[i],state_seq[i+1]) += 1;
83 
84  SGMatrix< float64_t > obs = mf->get_feature_vector(feat_idx);
85  REQUIRE(obs.num_rows == D && obs.num_cols == state_seq.vlen,
86  "obs.num_rows (%d) != D (%d) OR obs.num_cols (%d) != state_seq.vlen (%d)\n",
87  obs.num_rows, D, obs.num_cols, state_seq.vlen)
88  m_emission_weights.zero();
89  index_t aux_idx, weight_idx;
90 
91  if ( !m_use_plifs ) // Do not use PLiFs
92  {
93  for ( int32_t f = 0 ; f < D ; ++f )
94  {
95  aux_idx = f*m_num_obs;
96 
97  for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
98  {
99  weight_idx = aux_idx + state_seq[j]*D*m_num_obs + obs(f,j);
100  m_emission_weights[weight_idx] += 1;
101  }
102  }
103 
104  m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
105  D, m_num_obs);
106  }
107  else // Use PLiFs
108  {
109  int32_t S = m_state_model->get_num_states();
110 
111  for ( int32_t f = 0 ; f < D ; ++f )
112  {
113  aux_idx = f*m_num_plif_nodes;
114 
115  for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
116  {
117  CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + state_seq[j]);
118  SGVector<float64_t> limits = plif->get_plif_limits();
119  // The number of supporting points smaller or equal than value
120  int32_t count = 0;
121  // The observation value
122  float64_t value = obs(f,j);
123 
124  for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
125  {
126  if ( limits[i] <= value )
127  ++count;
128  else
129  break;
130  }
131 
132  weight_idx = aux_idx + state_seq[j]*D*m_num_plif_nodes;
133 
134  if ( count == 0 )
135  m_emission_weights[weight_idx] += 1;
136  else if ( count == m_num_plif_nodes )
137  m_emission_weights[weight_idx + m_num_plif_nodes-1] += 1;
138  else
139  {
140  m_emission_weights[weight_idx + count] +=
141  (value-limits[count-1]) / (limits[count]-limits[count-1]);
142 
143  m_emission_weights[weight_idx + count-1] +=
144  (limits[count]-value) / (limits[count]-limits[count-1]);
145  }
146 
147  SG_UNREF(plif);
148  }
149  }
150 
151  m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
152  D, m_num_plif_nodes);
153  }
154 
155  return psi;
156 }
157 
160  int32_t feat_idx,
161  bool const training)
162 {
163  int32_t dim = get_dim();
164  ASSERT( w.vlen == get_dim() )
165 
166  // Shorthand for the number of features of the feature vector
168  int32_t D = mf->get_num_features();
169  // Shorthand for the number of states
170  int32_t S = m_state_model->get_num_states();
171 
172  if ( m_use_plifs )
173  {
174  REQUIRE(m_plif_matrix, "PLiF matrix not allocated, has the SO machine been trained with "
175  "the use_plifs option?\n");
176  REQUIRE(m_plif_matrix->get_num_elements() == S*D, "Dimension mismatch in PLiF matrix, have the "
177  "feature dimension and/or number of states changed from training to prediction?\n");
178  }
179 
180  // Distribution of start states
181  SGVector< float64_t > p = m_state_model->get_start_states();
182  // Distribution of stop states
183  SGVector< float64_t > q = m_state_model->get_stop_states();
184 
185  // Compute the loss-augmented emission matrix:
186  // E_{s,i} = w^{em}_s \cdot x_i + Delta(y_i, s)
187 
188  SGMatrix< float64_t > x = mf->get_feature_vector(feat_idx);
189 
190  int32_t T = x.num_cols;
191  SGMatrix< float64_t > E(S, T);
192  E.zero();
193 
194  if ( !m_use_plifs ) // Do not use PLiFs
195  {
196  index_t em_idx;
197  m_state_model->reshape_emission_params(m_emission_weights, w, D, m_num_obs);
198 
199  for ( int32_t i = 0 ; i < T ; ++i )
200  {
201  for ( int32_t j = 0 ; j < D ; ++j )
202  {
203  //FIXME make independent of observation values
204  em_idx = j*m_num_obs + (index_t)CMath::round(x(j,i));
205 
206  for ( int32_t s = 0 ; s < S ; ++s )
207  E(s,i) += m_emission_weights[s*D*m_num_obs + em_idx];
208  }
209  }
210  }
211  else // Use PLiFs
212  {
213  m_state_model->reshape_emission_params(m_plif_matrix, w, D, m_num_plif_nodes);
214 
215  for ( int32_t i = 0 ; i < T ; ++i )
216  {
217  for ( int32_t f = 0 ; f < D ; ++f )
218  {
219  for ( int32_t s = 0 ; s < S ; ++s )
220  {
221  CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + s);
222  E(s,i) += plif->lookup( x(f,i) );
223  SG_UNREF(plif);
224  }
225  }
226  }
227  }
228 
229  // If argmax used while training, add to E the loss matrix (loss-augmented inference)
230  if ( training )
231  {
232  CSequence* ytrue =
234 
235  REQUIRE(ytrue->get_data().size() == T, "T, the length of the feature "
236  "x^i (%d) and the length of its corresponding label y^i "
237  "(%d) must be the same.\n", T, ytrue->get_data().size());
238 
239  SGMatrix< float64_t > loss_matrix = m_state_model->loss_matrix(ytrue);
240 
241  ASSERT(loss_matrix.num_rows == E.num_rows &&
242  loss_matrix.num_cols == E.num_cols);
243 
245  1.0, loss_matrix.matrix, E.num_rows*E.num_cols);
246 
247  // Decrement the reference count corresponding to get_label above
248  SG_UNREF(ytrue);
249  }
250 
251  // Initialize the dynamic programming table and the traceback matrix
252  SGMatrix< float64_t > dp(T, S);
253  SGMatrix< float64_t > trb(T, S);
254  m_state_model->reshape_transmission_params(m_transmission_weights, w);
255 
256  for ( int32_t s = 0 ; s < S ; ++s )
257  {
258  if ( p[s] > -CMath::INFTY )
259  {
260  // dp(0,s) = E(s,0)
261  dp(0,s) = E[s];
262  }
263  else
264  {
265  dp(0,s) = -CMath::INFTY;
266  }
267  }
268 
269  // Viterbi algorithm
270  int32_t idx;
271  float64_t tmp_score, e, a;
272 
273  for ( int32_t i = 1 ; i < T ; ++i )
274  {
275  for ( int32_t cur = 0 ; cur < S ; ++cur )
276  {
277  idx = cur*T + i;
278 
279  dp[idx] = -CMath::INFTY;
280  trb[idx] = -1;
281 
282  // e = E(cur,i)
283  e = E[i*S + cur];
284 
285  for ( int32_t prev = 0 ; prev < S ; ++prev )
286  {
287  // aij = m_transmission_weights(prev, cur)
288  a = m_transmission_weights[cur*S + prev];
289 
290  if ( a > -CMath::INFTY )
291  {
292  // tmp_score = e + a + dp(i-1, prev)
293  tmp_score = e + a + dp[prev*T + i-1];
294 
295  if ( tmp_score > dp[idx] )
296  {
297  dp[idx] = tmp_score;
298  trb[idx] = prev;
299  }
300  }
301  }
302  }
303  }
304 
305  // Trace back the most likely sequence of states
306  SGVector< int32_t > opt_path(T);
307  CResultSet* ret = new CResultSet();
308  SG_REF(ret);
309  ret->score = -CMath::INFTY;
310  opt_path[T-1] = -1;
311 
312  for ( int32_t s = 0 ; s < S ; ++s )
313  {
314  idx = s*T + T-1;
315 
316  if ( q[s] > -CMath::INFTY && dp[idx] > ret->score )
317  {
318  ret->score = dp[idx];
319  opt_path[T-1] = s;
320  }
321  }
322 
323  REQUIRE(opt_path[T-1]!=-1, "Viterbi decoding found no possible sequence states.\n"
324  "Maybe the state model used cannot produce such sequence.\n"
325  "If using the TwoStateModel, please use sequences of length greater than two.\n");
326 
327  for ( int32_t i = T-1 ; i > 0 ; --i )
328  opt_path[i-1] = trb[opt_path[i]*T + i];
329 
330  // Populate the CResultSet object to return
331  CSequence* ypred = m_state_model->states_to_labels(opt_path);
332 
333  ret->psi_pred = get_joint_feature_vector(feat_idx, ypred);
334  ret->argmax = ypred;
335  if ( training )
336  {
337  ret->delta = CStructuredModel::delta_loss(feat_idx, ypred);
338  ret->psi_truth = CStructuredModel::get_joint_feature_vector(feat_idx, feat_idx);
340  }
341 
342  return ret;
343 }
344 
346 {
349 
350  // Compute the Hamming loss, number of distinct elements in the sequences
351  return m_state_model->loss(seq1, seq2);
352 }
353 
355  float64_t regularization,
363 {
364  // Shorthand for the number of free states (i.e. states for which parameters are learnt)
365  int32_t S = ((CSequenceLabels*) m_labels)->get_num_states();
366  // Shorthand for the number of features of the feature vector
367  int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
368 
369  // Monotonicity constraints for feature scoring functions
370  SGVector< int32_t > monotonicity = m_state_model->get_monotonicity(S,D);
371 
372  // Quadratic regularization
373 
374  float64_t C_small = regularization;
375  float64_t C_smooth = 0.02*regularization;
376  // TODO change the representation of C to sparse matrix
377  C = SGMatrix< float64_t >(get_dim()+m_num_aux, get_dim()+m_num_aux);
378  C.zero();
379  for ( int32_t i = 0 ; i < get_dim() ; ++i )
380  C(i,i) = C_small;
381  for ( int32_t i = get_dim() ; i < get_dim()+m_num_aux ; ++i )
382  C(i,i) = C_smooth;
383 
384  // Smoothness constraints
385 
386  // For each auxiliary variable, there are two different constraints
387  // TODO change the representation of A to sparse matrix
388  A = SGMatrix< float64_t >(2*m_num_aux, get_dim()+m_num_aux);
389  A.zero();
390 
391  // Indices to the beginning of the blocks of scores. Each block is
392  // formed by the scores of a pair (state, feature)
393  SGVector< int32_t > score_starts(S*D);
394  int32_t delta = m_use_plifs ? m_num_plif_nodes : m_num_obs;
395  for ( int32_t idx = S*S, k = 0 ; k < S*D ; idx += delta, ++k )
396  score_starts[k] = idx;
397 
398  // Indices to the beginning of the blocks of variables for smoothness
399  SGVector< int32_t > aux_starts_smooth(S*D);
400  for ( int32_t idx = get_dim(), k = 0 ; k < S*D ; idx += delta-1, ++k )
401  aux_starts_smooth[k] = idx;
402 
403  // Bound the difference between adjacent score values from above and
404  // below by an auxiliary variable (which then is regularized
405  // quadratically)
406 
407  int32_t con_idx = 0, scr_idx, aux_idx;
408 
409  for ( int32_t i = 0 ; i < score_starts.vlen ; ++i )
410  {
411  scr_idx = score_starts[i];
412  aux_idx = aux_starts_smooth[i];
413 
414  for ( int32_t j = 0 ; j < delta-1 ; ++j )
415  {
416  A(con_idx, scr_idx) = 1;
417  A(con_idx, scr_idx+1) = -1;
418 
419  if ( monotonicity[i] != 1 )
420  A(con_idx, aux_idx) = -1;
421  ++con_idx;
422 
423  A(con_idx, scr_idx) = -1;
424  A(con_idx, scr_idx+1) = 1;
425 
426  if ( monotonicity[i] != -1 )
427  A(con_idx, aux_idx) = -1;
428  ++con_idx;
429 
430  ++scr_idx, ++aux_idx;
431  }
432  }
433 
434  // Bounds for the smoothness constraints
435  b = SGVector< float64_t >(2*m_num_aux);
436  b.zero();
437 }
438 
440 {
441  // Shorthand for the labels in the correct type
442  CSequenceLabels* hmsvm_labels = (CSequenceLabels*) m_labels;
443  // Frequency of each state
444  SGVector< int32_t > state_freq( hmsvm_labels->get_num_states() );
445  state_freq.zero();
446 
447  CSequence* seq;
448  int32_t state;
449  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_labels() ; ++i )
450  {
451  seq = CSequence::obtain_from_generic(hmsvm_labels->get_label(i));
452 
453  SGVector<int32_t> seq_data = seq->get_data();
454  for ( int32_t j = 0 ; j < seq_data.size() ; ++j )
455  {
456  state = seq_data[j];
457 
458  if ( state < 0 || state >= hmsvm_labels->get_num_states() )
459  {
460  SG_ERROR("Found state out of {0, 1, ..., "
461  "num_states-1}\n");
462  return false;
463  }
464  else
465  {
466  ++state_freq[state];
467  }
468  }
469 
470  // Decrement the reference count increased by get_label
471  SG_UNREF(seq);
472  }
473 
474  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_states() ; ++i )
475  {
476  if ( state_freq[i] <= 0 )
477  {
478  SG_ERROR("What? State %d has never appeared\n", i)
479  return false;
480  }
481  }
482 
483  return true;
484 }
485 
486 void CHMSVMModel::init()
487 {
488  SG_ADD((CSGObject**) &m_state_model, "m_state_model", "The state model", MS_NOT_AVAILABLE);
489  SG_ADD(&m_transmission_weights, "m_transmission_weights",
490  "Transmission weights used in Viterbi", MS_NOT_AVAILABLE);
491  SG_ADD(&m_emission_weights, "m_emission_weights",
492  "Emission weights used in Viterbi", MS_NOT_AVAILABLE);
493  SG_ADD(&m_num_plif_nodes, "m_num_plif_nodes", "The number of points per PLiF",
494  MS_NOT_AVAILABLE); // FIXME It would actually make sense to do MS for this parameter
495  SG_ADD(&m_use_plifs, "m_use_plifs", "Whether to use plifs", MS_NOT_AVAILABLE);
496 
497  m_num_obs = 0;
498  m_num_aux = 0;
499  m_use_plifs = false;
500  m_state_model = NULL;
501  m_plif_matrix = NULL;
502  m_num_plif_nodes = 0;
503 }
504 
506 {
507  return m_num_aux;
508 }
509 
511 {
512  return 2*m_num_aux;
513 }
514 
515 void CHMSVMModel::set_use_plifs(bool use_plifs)
516 {
517  m_use_plifs = use_plifs;
518 }
519 
521 {
522  // Shorthands for the number of states, the matrix features and their dimension
523  int32_t S = m_state_model->get_num_states();
525  int32_t D = mf->get_num_features();
526 
527  // Transmission and emission weights allocation
528  m_transmission_weights = SGMatrix< float64_t >(S,S);
529  if ( m_use_plifs )
530  m_emission_weights = SGVector< float64_t >(S*D*m_num_plif_nodes);
531  else
532  m_emission_weights = SGVector< float64_t >(S*D*m_num_obs);
533 
534  // Auxiliary variables
535 
536  // Shorthand for the number of free states
537  int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
538  if ( m_use_plifs )
539  m_num_aux = free_states*D*(m_num_plif_nodes-1);
540  else
541  m_num_aux = free_states*D*(m_num_obs-1);
542 
543  if ( m_use_plifs )
544  {
545  // Initialize PLiF matrix
546  m_plif_matrix = new CDynamicObjectArray(S*D);
547  SG_REF(m_plif_matrix);
548 
549  // Determine the x values for the supporting points of the PLiFs
550 
551  // Count the number of points per feature, using all the feature vectors
552  int32_t N = 0;
553  for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
554  {
555  SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
556  N += feat_vec.num_cols;
557  }
558 
559  // Choose the supporting points so that roughly the same number of points fall in each bin
560  SGVector< float64_t > a = SGVector< float64_t >::linspace_vec(1, N, m_num_plif_nodes+1);
561  SGVector< index_t > signal_idxs(m_num_plif_nodes);
562  for ( int32_t i = 0 ; i < signal_idxs.vlen ; ++i )
563  signal_idxs[i] = (index_t) CMath::round( (a[i] + a[i+1]) / 2 ) - 1;
564 
565  SGVector< float64_t > signal(N);
566  index_t idx; // used to populate signal
567  for ( int32_t f = 0 ; f < D ; ++f )
568  {
569  // Get the points of feature f of all the feature vectors
570  idx = 0;
571  for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
572  {
573  SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
574  for ( int32_t j = 0 ; j < feat_vec.num_cols ; ++j )
575  signal[idx++] = feat_vec(f,j);
576  }
577 
578  signal.qsort();
579  SGVector< float64_t > limits(m_num_plif_nodes);
580  for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
581  limits[i] = signal[ signal_idxs[i] ];
582 
583  // Set the PLiFs' supporting points
584  for ( int32_t s = 0 ; s < S ; ++s )
585  {
586  CPlif* plif = new CPlif(m_num_plif_nodes);
587  plif->set_plif_limits(limits);
588  plif->set_min_value(-CMath::INFTY);
590  m_plif_matrix->push_back(plif);
591  }
592  }
593  }
594 }
595 
597 {
598  return m_transmission_weights;
599 }
600 
602 {
603  return m_emission_weights;
604 }
605 
607 {
608  SG_REF(m_state_model);
609  return m_state_model;
610 }

SHOGUN Machine Learning Toolbox - Documentation