SHOGUN  4.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HMSVMModel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Fernando José Iglesias García
8  * Copyright (C) 2012 Fernando José Iglesias García
9  */
10 
15 #include <shogun/structure/Plif.h>
17 
18 using namespace shogun;
19 
22 {
23  init();
24 }
25 
26 CHMSVMModel::CHMSVMModel(CFeatures* features, CStructuredLabels* labels, EStateModelType smt, int32_t num_obs, bool use_plifs)
27 : CStructuredModel(features, labels)
28 {
29  init();
30  m_num_obs = num_obs;
31  m_num_plif_nodes = 20;
32  m_use_plifs = use_plifs;
33 
34  switch (smt)
35  {
36  case SMT_TWO_STATE:
37  m_state_model = new CTwoStateModel();
38  break;
39  case SMT_UNKNOWN:
40  default:
41  SG_ERROR("The EStateModelType given is not valid\n")
42  }
43 }
44 
46 {
47  SG_UNREF(m_state_model);
48  SG_UNREF(m_plif_matrix);
49 }
50 
51 int32_t CHMSVMModel::get_dim() const
52 {
53  // Shorthand for the number of free states
54  int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
56  int32_t D = mf->get_num_features();
57 
58  if ( m_use_plifs )
59  return free_states*(free_states + D*m_num_plif_nodes);
60  else
61  return free_states*(free_states + D*m_num_obs);
62 }
63 
65  int32_t feat_idx,
66  CStructuredData* y)
67 {
68  // Shorthand for the number of features of the feature vector
70  int32_t D = mf->get_num_features();
71 
72  // Get the sequence of labels
74 
75  // Initialize psi
77  psi.zero();
78 
79  // Translate from labels sequence to state sequence
80  SGVector< int32_t > state_seq = m_state_model->labels_to_states(label_seq);
81  m_transmission_weights.zero();
82 
83  for ( int32_t i = 0 ; i < state_seq.vlen-1 ; ++i )
84  m_transmission_weights(state_seq[i],state_seq[i+1]) += 1;
85 
86  SGMatrix< float64_t > obs = mf->get_feature_vector(feat_idx);
87  REQUIRE(obs.num_rows == D && obs.num_cols == state_seq.vlen,
88  "obs.num_rows (%d) != D (%d) OR obs.num_cols (%d) != state_seq.vlen (%d)\n",
89  obs.num_rows, D, obs.num_cols, state_seq.vlen)
90  m_emission_weights.zero();
91  index_t aux_idx, weight_idx;
92 
93  if ( !m_use_plifs ) // Do not use PLiFs
94  {
95  for ( int32_t f = 0 ; f < D ; ++f )
96  {
97  aux_idx = f*m_num_obs;
98 
99  for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
100  {
101  weight_idx = aux_idx + state_seq[j]*D*m_num_obs + obs(f,j);
102  m_emission_weights[weight_idx] += 1;
103  }
104  }
105 
106  m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
107  D, m_num_obs);
108  }
109  else // Use PLiFs
110  {
111  int32_t S = m_state_model->get_num_states();
112 
113  for ( int32_t f = 0 ; f < D ; ++f )
114  {
115  aux_idx = f*m_num_plif_nodes;
116 
117  for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
118  {
119  CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + state_seq[j]);
120  SGVector<float64_t> limits = plif->get_plif_limits();
121  // The number of supporting points smaller or equal than value
122  int32_t count = 0;
123  // The observation value
124  float64_t value = obs(f,j);
125 
126  for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
127  {
128  if ( limits[i] <= value )
129  ++count;
130  else
131  break;
132  }
133 
134  weight_idx = aux_idx + state_seq[j]*D*m_num_plif_nodes;
135 
136  if ( count == 0 )
137  m_emission_weights[weight_idx] += 1;
138  else if ( count == m_num_plif_nodes )
139  m_emission_weights[weight_idx + m_num_plif_nodes-1] += 1;
140  else
141  {
142  m_emission_weights[weight_idx + count] +=
143  (value-limits[count-1]) / (limits[count]-limits[count-1]);
144 
145  m_emission_weights[weight_idx + count-1] +=
146  (limits[count]-value) / (limits[count]-limits[count-1]);
147  }
148 
149  SG_UNREF(plif);
150  }
151  }
152 
153  m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
154  D, m_num_plif_nodes);
155  }
156 
157  return psi;
158 }
159 
162  int32_t feat_idx,
163  bool const training)
164 {
165  int32_t dim = get_dim();
166  ASSERT( w.vlen == get_dim() )
167 
168  // Shorthand for the number of features of the feature vector
170  int32_t D = mf->get_num_features();
171  // Shorthand for the number of states
172  int32_t S = m_state_model->get_num_states();
173 
174  if ( m_use_plifs )
175  {
176  REQUIRE(m_plif_matrix, "PLiF matrix not allocated, has the SO machine been trained with "
177  "the use_plifs option?\n");
178  REQUIRE(m_plif_matrix->get_num_elements() == S*D, "Dimension mismatch in PLiF matrix, have the "
179  "feature dimension and/or number of states changed from training to prediction?\n");
180  }
181 
182  // Distribution of start states
183  SGVector< float64_t > p = m_state_model->get_start_states();
184  // Distribution of stop states
185  SGVector< float64_t > q = m_state_model->get_stop_states();
186 
187  // Compute the loss-augmented emission matrix:
188  // E_{s,i} = w^{em}_s \cdot x_i + Delta(y_i, s)
189 
190  SGMatrix< float64_t > x = mf->get_feature_vector(feat_idx);
191 
192  int32_t T = x.num_cols;
193  SGMatrix< float64_t > E(S, T);
194  E.zero();
195 
196  if ( !m_use_plifs ) // Do not use PLiFs
197  {
198  index_t em_idx;
199  m_state_model->reshape_emission_params(m_emission_weights, w, D, m_num_obs);
200 
201  for ( int32_t i = 0 ; i < T ; ++i )
202  {
203  for ( int32_t j = 0 ; j < D ; ++j )
204  {
205  //FIXME make independent of observation values
206  em_idx = j*m_num_obs + (index_t)CMath::round(x(j,i));
207 
208  for ( int32_t s = 0 ; s < S ; ++s )
209  E(s,i) += m_emission_weights[s*D*m_num_obs + em_idx];
210  }
211  }
212  }
213  else // Use PLiFs
214  {
215  m_state_model->reshape_emission_params(m_plif_matrix, w, D, m_num_plif_nodes);
216 
217  for ( int32_t i = 0 ; i < T ; ++i )
218  {
219  for ( int32_t f = 0 ; f < D ; ++f )
220  {
221  for ( int32_t s = 0 ; s < S ; ++s )
222  {
223  CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + s);
224  E(s,i) += plif->lookup( x(f,i) );
225  SG_UNREF(plif);
226  }
227  }
228  }
229  }
230 
231  // If argmax used while training, add to E the loss matrix (loss-augmented inference)
232  if ( training )
233  {
234  CSequence* ytrue =
236 
237  REQUIRE(ytrue->get_data().size() == T, "T, the length of the feature "
238  "x^i (%d) and the length of its corresponding label y^i "
239  "(%d) must be the same.\n", T, ytrue->get_data().size());
240 
241  SGMatrix< float64_t > loss_matrix = m_state_model->loss_matrix(ytrue);
242 
243  ASSERT(loss_matrix.num_rows == E.num_rows &&
244  loss_matrix.num_cols == E.num_cols);
245 
247  1.0, loss_matrix.matrix, E.num_rows*E.num_cols);
248 
249  // Decrement the reference count corresponding to get_label above
250  SG_UNREF(ytrue);
251  }
252 
253  // Initialize the dynamic programming table and the traceback matrix
254  SGMatrix< float64_t > dp(T, S);
255  SGMatrix< float64_t > trb(T, S);
256  m_state_model->reshape_transmission_params(m_transmission_weights, w);
257 
258  for ( int32_t s = 0 ; s < S ; ++s )
259  {
260  if ( p[s] > -CMath::INFTY )
261  {
262  // dp(0,s) = E(s,0)
263  dp(0,s) = E[s];
264  }
265  else
266  {
267  dp(0,s) = -CMath::INFTY;
268  }
269  }
270 
271  // Viterbi algorithm
272  int32_t idx;
273  float64_t tmp_score, e, a;
274 
275  for ( int32_t i = 1 ; i < T ; ++i )
276  {
277  for ( int32_t cur = 0 ; cur < S ; ++cur )
278  {
279  idx = cur*T + i;
280 
281  dp[idx] = -CMath::INFTY;
282  trb[idx] = -1;
283 
284  // e = E(cur,i)
285  e = E[i*S + cur];
286 
287  for ( int32_t prev = 0 ; prev < S ; ++prev )
288  {
289  // aij = m_transmission_weights(prev, cur)
290  a = m_transmission_weights[cur*S + prev];
291 
292  if ( a > -CMath::INFTY )
293  {
294  // tmp_score = e + a + dp(i-1, prev)
295  tmp_score = e + a + dp[prev*T + i-1];
296 
297  if ( tmp_score > dp[idx] )
298  {
299  dp[idx] = tmp_score;
300  trb[idx] = prev;
301  }
302  }
303  }
304  }
305  }
306 
307  // Trace back the most likely sequence of states
308  SGVector< int32_t > opt_path(T);
309  CResultSet* ret = new CResultSet();
310  SG_REF(ret);
311  ret->psi_computed = true;
312  ret->score = -CMath::INFTY;
313  opt_path[T-1] = -1;
314 
315  for ( int32_t s = 0 ; s < S ; ++s )
316  {
317  idx = s*T + T-1;
318 
319  if ( q[s] > -CMath::INFTY && dp[idx] > ret->score )
320  {
321  ret->score = dp[idx];
322  opt_path[T-1] = s;
323  }
324  }
325 
326  REQUIRE(opt_path[T-1]!=-1, "Viterbi decoding found no possible sequence states.\n"
327  "Maybe the state model used cannot produce such sequence.\n"
328  "If using the TwoStateModel, please use sequences of length greater than two.\n");
329 
330  for ( int32_t i = T-1 ; i > 0 ; --i )
331  opt_path[i-1] = trb[opt_path[i]*T + i];
332 
333  // Populate the CResultSet object to return
334  CSequence* ypred = m_state_model->states_to_labels(opt_path);
335 
336  ret->psi_pred = get_joint_feature_vector(feat_idx, ypred);
337  ret->argmax = ypred;
338  if ( training )
339  {
340  ret->delta = CStructuredModel::delta_loss(feat_idx, ypred);
341  ret->psi_truth = CStructuredModel::get_joint_feature_vector(feat_idx, feat_idx);
342  ret->score -= CMath::dot(w.vector, ret->psi_truth.vector, dim);
343  }
344 
345  return ret;
346 }
347 
349 {
352 
353  // Compute the Hamming loss, number of distinct elements in the sequences
354  return m_state_model->loss(seq1, seq2);
355 }
356 
358  float64_t regularization,
366 {
367  // Shorthand for the number of free states (i.e. states for which parameters are learnt)
368  int32_t S = ((CSequenceLabels*) m_labels)->get_num_states();
369  // Shorthand for the number of features of the feature vector
370  int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
371 
372  // Monotonicity constraints for feature scoring functions
373  SGVector< int32_t > monotonicity = m_state_model->get_monotonicity(S,D);
374 
375  // Quadratic regularization
376 
377  float64_t C_small = regularization;
378  float64_t C_smooth = 0.02*regularization;
379  // TODO change the representation of C to sparse matrix
380  C = SGMatrix< float64_t >(get_dim()+m_num_aux, get_dim()+m_num_aux);
381  C.zero();
382  for ( int32_t i = 0 ; i < get_dim() ; ++i )
383  C(i,i) = C_small;
384  for ( int32_t i = get_dim() ; i < get_dim()+m_num_aux ; ++i )
385  C(i,i) = C_smooth;
386 
387  // Smoothness constraints
388 
389  // For each auxiliary variable, there are two different constraints
390  // TODO change the representation of A to sparse matrix
391  A = SGMatrix< float64_t >(2*m_num_aux, get_dim()+m_num_aux);
392  A.zero();
393 
394  // Indices to the beginning of the blocks of scores. Each block is
395  // formed by the scores of a pair (state, feature)
396  SGVector< int32_t > score_starts(S*D);
397  int32_t delta = m_use_plifs ? m_num_plif_nodes : m_num_obs;
398  for ( int32_t idx = S*S, k = 0 ; k < S*D ; idx += delta, ++k )
399  score_starts[k] = idx;
400 
401  // Indices to the beginning of the blocks of variables for smoothness
402  SGVector< int32_t > aux_starts_smooth(S*D);
403  for ( int32_t idx = get_dim(), k = 0 ; k < S*D ; idx += delta-1, ++k )
404  aux_starts_smooth[k] = idx;
405 
406  // Bound the difference between adjacent score values from above and
407  // below by an auxiliary variable (which then is regularized
408  // quadratically)
409 
410  int32_t con_idx = 0, scr_idx, aux_idx;
411 
412  for ( int32_t i = 0 ; i < score_starts.vlen ; ++i )
413  {
414  scr_idx = score_starts[i];
415  aux_idx = aux_starts_smooth[i];
416 
417  for ( int32_t j = 0 ; j < delta-1 ; ++j )
418  {
419  A(con_idx, scr_idx) = 1;
420  A(con_idx, scr_idx+1) = -1;
421 
422  if ( monotonicity[i] != 1 )
423  A(con_idx, aux_idx) = -1;
424  ++con_idx;
425 
426  A(con_idx, scr_idx) = -1;
427  A(con_idx, scr_idx+1) = 1;
428 
429  if ( monotonicity[i] != -1 )
430  A(con_idx, aux_idx) = -1;
431  ++con_idx;
432 
433  ++scr_idx, ++aux_idx;
434  }
435  }
436 
437  // Bounds for the smoothness constraints
438  b = SGVector< float64_t >(2*m_num_aux);
439  b.zero();
440 }
441 
443 {
444  // Shorthand for the labels in the correct type
445  CSequenceLabels* hmsvm_labels = (CSequenceLabels*) m_labels;
446  // Frequency of each state
447  SGVector< int32_t > state_freq( hmsvm_labels->get_num_states() );
448  state_freq.zero();
449 
450  CSequence* seq;
451  int32_t state;
452  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_labels() ; ++i )
453  {
454  seq = CSequence::obtain_from_generic(hmsvm_labels->get_label(i));
455 
456  SGVector<int32_t> seq_data = seq->get_data();
457  for ( int32_t j = 0 ; j < seq_data.size() ; ++j )
458  {
459  state = seq_data[j];
460 
461  if ( state < 0 || state >= hmsvm_labels->get_num_states() )
462  {
463  SG_ERROR("Found state out of {0, 1, ..., "
464  "num_states-1}\n");
465  return false;
466  }
467  else
468  {
469  ++state_freq[state];
470  }
471  }
472 
473  // Decrement the reference count increased by get_label
474  SG_UNREF(seq);
475  }
476 
477  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_states() ; ++i )
478  {
479  if ( state_freq[i] <= 0 )
480  {
481  SG_ERROR("What? State %d has never appeared\n", i)
482  return false;
483  }
484  }
485 
486  return true;
487 }
488 
489 void CHMSVMModel::init()
490 {
491  SG_ADD((CSGObject**) &m_state_model, "m_state_model", "The state model", MS_NOT_AVAILABLE);
492  SG_ADD(&m_transmission_weights, "m_transmission_weights",
493  "Transmission weights used in Viterbi", MS_NOT_AVAILABLE);
494  SG_ADD(&m_emission_weights, "m_emission_weights",
495  "Emission weights used in Viterbi", MS_NOT_AVAILABLE);
496  SG_ADD(&m_num_plif_nodes, "m_num_plif_nodes", "The number of points per PLiF",
497  MS_NOT_AVAILABLE); // FIXME It would actually make sense to do MS for this parameter
498  SG_ADD(&m_use_plifs, "m_use_plifs", "Whether to use plifs", MS_NOT_AVAILABLE);
499 
500  m_num_obs = 0;
501  m_num_aux = 0;
502  m_use_plifs = false;
503  m_state_model = NULL;
504  m_plif_matrix = NULL;
505  m_num_plif_nodes = 0;
506 }
507 
509 {
510  return m_num_aux;
511 }
512 
514 {
515  return 2*m_num_aux;
516 }
517 
518 void CHMSVMModel::set_use_plifs(bool use_plifs)
519 {
520  m_use_plifs = use_plifs;
521 }
522 
524 {
525  // Shorthands for the number of states, the matrix features and their dimension
526  int32_t S = m_state_model->get_num_states();
528  int32_t D = mf->get_num_features();
529 
530  // Transmission and emission weights allocation
531  m_transmission_weights = SGMatrix< float64_t >(S,S);
532  if ( m_use_plifs )
533  m_emission_weights = SGVector< float64_t >(S*D*m_num_plif_nodes);
534  else
535  m_emission_weights = SGVector< float64_t >(S*D*m_num_obs);
536 
537  // Auxiliary variables
538 
539  // Shorthand for the number of free states
540  int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
541  if ( m_use_plifs )
542  m_num_aux = free_states*D*(m_num_plif_nodes-1);
543  else
544  m_num_aux = free_states*D*(m_num_obs-1);
545 
546  if ( m_use_plifs )
547  {
548  // Initialize PLiF matrix
549  m_plif_matrix = new CDynamicObjectArray(S*D);
550  SG_REF(m_plif_matrix);
551 
552  // Determine the x values for the supporting points of the PLiFs
553 
554  // Count the number of points per feature, using all the feature vectors
555  int32_t N = 0;
556  for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
557  {
558  SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
559  N += feat_vec.num_cols;
560  }
561 
562  // Choose the supporting points so that roughly the same number of points fall in each bin
563  SGVector< float64_t > a = CMath::linspace_vec(1, N, m_num_plif_nodes+1);
564  SGVector< index_t > signal_idxs(m_num_plif_nodes);
565  for ( int32_t i = 0 ; i < signal_idxs.vlen ; ++i )
566  signal_idxs[i] = (index_t) CMath::round( (a[i] + a[i+1]) / 2 ) - 1;
567 
568  SGVector< float64_t > signal(N);
569  index_t idx; // used to populate signal
570  for ( int32_t f = 0 ; f < D ; ++f )
571  {
572  // Get the points of feature f of all the feature vectors
573  idx = 0;
574  for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
575  {
576  SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
577  for ( int32_t j = 0 ; j < feat_vec.num_cols ; ++j )
578  signal[idx++] = feat_vec(f,j);
579  }
580 
581  CMath::qsort(signal);
582  SGVector< float64_t > limits(m_num_plif_nodes);
583  for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
584  limits[i] = signal[ signal_idxs[i] ];
585 
586  // Set the PLiFs' supporting points
587  for ( int32_t s = 0 ; s < S ; ++s )
588  {
589  CPlif* plif = new CPlif(m_num_plif_nodes);
590  plif->set_plif_limits(limits);
591  plif->set_min_value(-CMath::INFTY);
593  m_plif_matrix->push_back(plif);
594  }
595  }
596  }
597 }
598 
600 {
601  return m_transmission_weights;
602 }
603 
605 {
606  return m_emission_weights;
607 }
608 
610 {
611  SG_REF(m_state_model);
612  return m_state_model;
613 }

SHOGUN Machine Learning Toolbox - Documentation