SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HMSVMModel.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2012 Fernando José Iglesias García
8  * Copyright (C) 2012 Fernando José Iglesias García
9  */
10 
14 #include <shogun/structure/Plif.h>
15 
16 using namespace shogun;
17 
20 {
21  init();
22 }
23 
24 CHMSVMModel::CHMSVMModel(CFeatures* features, CStructuredLabels* labels, EStateModelType smt, int32_t num_obs, bool use_plifs)
25 : CStructuredModel(features, labels)
26 {
27  init();
28  m_num_obs = num_obs;
29  m_num_plif_nodes = 20;
30  m_use_plifs = use_plifs;
31 
32  switch (smt)
33  {
34  case SMT_TWO_STATE:
35  m_state_model = new CTwoStateModel();
36  break;
37  case SMT_UNKNOWN:
38  default:
39  SG_ERROR("The EStateModelType given is not valid\n")
40  }
41 }
42 
44 {
45  SG_UNREF(m_state_model);
46  SG_UNREF(m_plif_matrix);
47 }
48 
49 int32_t CHMSVMModel::get_dim() const
50 {
51  // Shorthand for the number of free states
52  int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
54  int32_t D = mf->get_num_features();
55 
56  if ( m_use_plifs )
57  return free_states*(free_states + D*m_num_plif_nodes);
58  else
59  return free_states*(free_states + D*m_num_obs);
60 }
61 
63  int32_t feat_idx,
64  CStructuredData* y)
65 {
66  // Shorthand for the number of features of the feature vector
68  int32_t D = mf->get_num_features();
69 
70  // Get the sequence of labels
72 
73  // Initialize psi
75  psi.zero();
76 
77  // Translate from labels sequence to state sequence
78  SGVector< int32_t > state_seq = m_state_model->labels_to_states(label_seq);
79  m_transmission_weights.zero();
80 
81  for ( int32_t i = 0 ; i < state_seq.vlen-1 ; ++i )
82  m_transmission_weights(state_seq[i],state_seq[i+1]) += 1;
83 
84  SGMatrix< float64_t > obs = mf->get_feature_vector(feat_idx);
85  REQUIRE(obs.num_rows == D && obs.num_cols == state_seq.vlen,
86  "obs.num_rows (%d) != D (%d) OR obs.num_cols (%d) != state_seq.vlen (%d)\n",
87  obs.num_rows, D, obs.num_cols, state_seq.vlen)
88  m_emission_weights.zero();
89  index_t aux_idx, weight_idx;
90 
91  if ( !m_use_plifs ) // Do not use PLiFs
92  {
93  for ( int32_t f = 0 ; f < D ; ++f )
94  {
95  aux_idx = f*m_num_obs;
96 
97  for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
98  {
99  weight_idx = aux_idx + state_seq[j]*D*m_num_obs + obs(f,j);
100  m_emission_weights[weight_idx] += 1;
101  }
102  }
103 
104  m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
105  D, m_num_obs);
106  }
107  else // Use PLiFs
108  {
109  int32_t S = m_state_model->get_num_states();
110 
111  for ( int32_t f = 0 ; f < D ; ++f )
112  {
113  aux_idx = f*m_num_plif_nodes;
114 
115  for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
116  {
117  CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + state_seq[j]);
118  SGVector<float64_t> limits = plif->get_plif_limits();
119  // The number of supporting points smaller or equal than value
120  int32_t count = 0;
121  // The observation value
122  float64_t value = obs(f,j);
123 
124  for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
125  {
126  if ( limits[i] <= value )
127  ++count;
128  else
129  break;
130  }
131 
132  weight_idx = aux_idx + state_seq[j]*D*m_num_plif_nodes;
133 
134  if ( count == 0 )
135  m_emission_weights[weight_idx] += 1;
136  else if ( count == m_num_plif_nodes )
137  m_emission_weights[weight_idx + m_num_plif_nodes-1] += 1;
138  else
139  {
140  m_emission_weights[weight_idx + count] +=
141  (value-limits[count-1]) / (limits[count]-limits[count-1]);
142 
143  m_emission_weights[weight_idx + count-1] +=
144  (limits[count]-value) / (limits[count]-limits[count-1]);
145  }
146 
147  SG_UNREF(plif);
148  }
149  }
150 
151  m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
152  D, m_num_plif_nodes);
153  }
154 
155  return psi;
156 }
157 
160  int32_t feat_idx,
161  bool const training)
162 {
163  int32_t dim = get_dim();
164  ASSERT( w.vlen == get_dim() )
165 
166  // Shorthand for the number of features of the feature vector
168  int32_t D = mf->get_num_features();
169  // Shorthand for the number of states
170  int32_t S = m_state_model->get_num_states();
171 
172  if ( m_use_plifs )
173  {
174  REQUIRE(m_plif_matrix, "PLiF matrix not allocated, has the SO machine been trained with "
175  "the use_plifs option?\n");
176  REQUIRE(m_plif_matrix->get_num_elements() == S*D, "Dimension mismatch in PLiF matrix, have the "
177  "feature dimension and/or number of states changed from training to prediction?\n");
178  }
179 
180  // Distribution of start states
181  SGVector< float64_t > p = m_state_model->get_start_states();
182  // Distribution of stop states
183  SGVector< float64_t > q = m_state_model->get_stop_states();
184 
185  // Compute the loss-augmented emission matrix:
186  // E_{s,i} = w^{em}_s \cdot x_i + Delta(y_i, s)
187 
188  SGMatrix< float64_t > x = mf->get_feature_vector(feat_idx);
189 
190  int32_t T = x.num_cols;
191  SGMatrix< float64_t > E(S, T);
192  E.zero();
193 
194  if ( !m_use_plifs ) // Do not use PLiFs
195  {
196  index_t em_idx;
197  m_state_model->reshape_emission_params(m_emission_weights, w, D, m_num_obs);
198 
199  for ( int32_t i = 0 ; i < T ; ++i )
200  {
201  for ( int32_t j = 0 ; j < D ; ++j )
202  {
203  //FIXME make independent of observation values
204  em_idx = j*m_num_obs + (index_t)CMath::round(x(j,i));
205 
206  for ( int32_t s = 0 ; s < S ; ++s )
207  E(s,i) += m_emission_weights[s*D*m_num_obs + em_idx];
208  }
209  }
210  }
211  else // Use PLiFs
212  {
213  m_state_model->reshape_emission_params(m_plif_matrix, w, D, m_num_plif_nodes);
214 
215  for ( int32_t i = 0 ; i < T ; ++i )
216  {
217  for ( int32_t f = 0 ; f < D ; ++f )
218  {
219  for ( int32_t s = 0 ; s < S ; ++s )
220  {
221  CPlif* plif = (CPlif*) m_plif_matrix->get_element(S*f + s);
222  E(s,i) += plif->lookup( x(f,i) );
223  SG_UNREF(plif);
224  }
225  }
226  }
227  }
228 
229  // If argmax used while training, add to E the loss matrix (loss-augmented inference)
230  if ( training )
231  {
232  CSequence* ytrue =
234 
235  REQUIRE(ytrue->get_data().size() == T, "T, the length of the feature "
236  "x^i (%d) and the length of its corresponding label y^i "
237  "(%d) must be the same.\n", T, ytrue->get_data().size());
238 
239  SGMatrix< float64_t > loss_matrix = m_state_model->loss_matrix(ytrue);
240 
241  ASSERT(loss_matrix.num_rows == E.num_rows &&
242  loss_matrix.num_cols == E.num_cols);
243 
245  1.0, loss_matrix.matrix, E.num_rows*E.num_cols);
246 
247  // Decrement the reference count corresponding to get_label above
248  SG_UNREF(ytrue);
249  }
250 
251  // Initialize the dynamic programming table and the traceback matrix
252  SGMatrix< float64_t > dp(T, S);
253  SGMatrix< float64_t > trb(T, S);
254  m_state_model->reshape_transmission_params(m_transmission_weights, w);
255 
256  for ( int32_t s = 0 ; s < S ; ++s )
257  {
258  if ( p[s] > -CMath::INFTY )
259  {
260  // dp(0,s) = E(s,0)
261  dp(0,s) = E[s];
262  }
263  else
264  {
265  dp(0,s) = -CMath::INFTY;
266  }
267  }
268 
269  // Viterbi algorithm
270  int32_t idx;
271  float64_t tmp_score, e, a;
272 
273  for ( int32_t i = 1 ; i < T ; ++i )
274  {
275  for ( int32_t cur = 0 ; cur < S ; ++cur )
276  {
277  idx = cur*T + i;
278 
279  dp[idx] = -CMath::INFTY;
280  trb[idx] = -1;
281 
282  // e = E(cur,i)
283  e = E[i*S + cur];
284 
285  for ( int32_t prev = 0 ; prev < S ; ++prev )
286  {
287  // aij = m_transmission_weights(prev, cur)
288  a = m_transmission_weights[cur*S + prev];
289 
290  if ( a > -CMath::INFTY )
291  {
292  // tmp_score = e + a + dp(i-1, prev)
293  tmp_score = e + a + dp[prev*T + i-1];
294 
295  if ( tmp_score > dp[idx] )
296  {
297  dp[idx] = tmp_score;
298  trb[idx] = prev;
299  }
300  }
301  }
302  }
303  }
304 
305  // Trace back the most likely sequence of states
306  SGVector< int32_t > opt_path(T);
307  CResultSet* ret = new CResultSet();
308  SG_REF(ret);
309  ret->score = -CMath::INFTY;
310  opt_path[T-1] = -1;
311 
312  for ( int32_t s = 0 ; s < S ; ++s )
313  {
314  idx = s*T + T-1;
315 
316  if ( q[s] > -CMath::INFTY && dp[idx] > ret->score )
317  {
318  ret->score = dp[idx];
319  opt_path[T-1] = s;
320  }
321  }
322 
323  REQUIRE(opt_path[T-1]!=-1, "Viterbi decoding found no possible sequence states.\n"
324  "Maybe the state model used cannot produce such sequence.\n"
325  "If using the TwoStateModel, please use sequences of length greater than two.\n");
326 
327  for ( int32_t i = T-1 ; i > 0 ; --i )
328  opt_path[i-1] = trb[opt_path[i]*T + i];
329 
330  // Populate the CResultSet object to return
331  CSequence* ypred = m_state_model->states_to_labels(opt_path);
332 
333  ret->psi_pred = get_joint_feature_vector(feat_idx, ypred);
334  ret->argmax = ypred;
335  if ( training )
336  {
337  ret->delta = CStructuredModel::delta_loss(feat_idx, ypred);
339  feat_idx, feat_idx);
341  ret->psi_truth.vector, dim);
342  }
343 
344  return ret;
345 }
346 
348 {
351 
352  // Compute the Hamming loss, number of distinct elements in the sequences
353  return m_state_model->loss(seq1, seq2);
354 }
355 
357  float64_t regularization,
365 {
366  // Shorthand for the number of free states (i.e. states for which parameters are learnt)
367  int32_t S = ((CSequenceLabels*) m_labels)->get_num_states();
368  // Shorthand for the number of features of the feature vector
369  int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
370 
371  // Monotonicity constraints for feature scoring functions
372  SGVector< int32_t > monotonicity = m_state_model->get_monotonicity(S,D);
373 
374  // Quadratic regularization
375 
376  float64_t C_small = regularization;
377  float64_t C_smooth = 0.02*regularization;
378  // TODO change the representation of C to sparse matrix
379  C = SGMatrix< float64_t >(get_dim()+m_num_aux, get_dim()+m_num_aux);
380  C.zero();
381  for ( int32_t i = 0 ; i < get_dim() ; ++i )
382  C(i,i) = C_small;
383  for ( int32_t i = get_dim() ; i < get_dim()+m_num_aux ; ++i )
384  C(i,i) = C_smooth;
385 
386  // Smoothness constraints
387 
388  // For each auxiliary variable, there are two different constraints
389  // TODO change the representation of A to sparse matrix
390  A = SGMatrix< float64_t >(2*m_num_aux, get_dim()+m_num_aux);
391  A.zero();
392 
393  // Indices to the beginning of the blocks of scores. Each block is
394  // formed by the scores of a pair (state, feature)
395  SGVector< int32_t > score_starts(S*D);
396  int32_t delta = m_use_plifs ? m_num_plif_nodes : m_num_obs;
397  for ( int32_t idx = S*S, k = 0 ; k < S*D ; idx += delta, ++k )
398  score_starts[k] = idx;
399 
400  // Indices to the beginning of the blocks of variables for smoothness
401  SGVector< int32_t > aux_starts_smooth(S*D);
402  for ( int32_t idx = get_dim(), k = 0 ; k < S*D ; idx += delta-1, ++k )
403  aux_starts_smooth[k] = idx;
404 
405  // Bound the difference between adjacent score values from above and
406  // below by an auxiliary variable (which then is regularized
407  // quadratically)
408 
409  int32_t con_idx = 0, scr_idx, aux_idx;
410 
411  for ( int32_t i = 0 ; i < score_starts.vlen ; ++i )
412  {
413  scr_idx = score_starts[i];
414  aux_idx = aux_starts_smooth[i];
415 
416  for ( int32_t j = 0 ; j < delta-1 ; ++j )
417  {
418  A(con_idx, scr_idx) = 1;
419  A(con_idx, scr_idx+1) = -1;
420 
421  if ( monotonicity[i] != 1 )
422  A(con_idx, aux_idx) = -1;
423  ++con_idx;
424 
425  A(con_idx, scr_idx) = -1;
426  A(con_idx, scr_idx+1) = 1;
427 
428  if ( monotonicity[i] != -1 )
429  A(con_idx, aux_idx) = -1;
430  ++con_idx;
431 
432  ++scr_idx, ++aux_idx;
433  }
434  }
435 
436  // Bounds for the smoothness constraints
437  b = SGVector< float64_t >(2*m_num_aux);
438  b.zero();
439 }
440 
442 {
443  // Shorthand for the labels in the correct type
444  CSequenceLabels* hmsvm_labels = (CSequenceLabels*) m_labels;
445  // Frequency of each state
446  SGVector< int32_t > state_freq( hmsvm_labels->get_num_states() );
447  state_freq.zero();
448 
449  CSequence* seq;
450  int32_t state;
451  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_labels() ; ++i )
452  {
453  seq = CSequence::obtain_from_generic(hmsvm_labels->get_label(i));
454 
455  SGVector<int32_t> seq_data = seq->get_data();
456  for ( int32_t j = 0 ; j < seq_data.size() ; ++j )
457  {
458  state = seq_data[j];
459 
460  if ( state < 0 || state >= hmsvm_labels->get_num_states() )
461  {
462  SG_ERROR("Found state out of {0, 1, ..., "
463  "num_states-1}\n");
464  return false;
465  }
466  else
467  {
468  ++state_freq[state];
469  }
470  }
471 
472  // Decrement the reference count increased by get_label
473  SG_UNREF(seq);
474  }
475 
476  for ( int32_t i = 0 ; i < hmsvm_labels->get_num_states() ; ++i )
477  {
478  if ( state_freq[i] <= 0 )
479  {
480  SG_ERROR("What? State %d has never appeared\n", i)
481  return false;
482  }
483  }
484 
485  return true;
486 }
487 
488 void CHMSVMModel::init()
489 {
490  SG_ADD((CSGObject**) &m_state_model, "m_state_model", "The state model", MS_NOT_AVAILABLE);
491  SG_ADD(&m_transmission_weights, "m_transmission_weights",
492  "Transmission weights used in Viterbi", MS_NOT_AVAILABLE);
493  SG_ADD(&m_emission_weights, "m_emission_weights",
494  "Emission weights used in Viterbi", MS_NOT_AVAILABLE);
495  SG_ADD(&m_num_plif_nodes, "m_num_plif_nodes", "The number of points per PLiF",
496  MS_NOT_AVAILABLE); // FIXME It would actually make sense to do MS for this parameter
497  SG_ADD(&m_use_plifs, "m_use_plifs", "Whether to use plifs", MS_NOT_AVAILABLE);
498 
499  m_num_obs = 0;
500  m_num_aux = 0;
501  m_use_plifs = false;
502  m_state_model = NULL;
503  m_plif_matrix = NULL;
504  m_num_plif_nodes = 0;
505 }
506 
508 {
509  return m_num_aux;
510 }
511 
513 {
514  return 2*m_num_aux;
515 }
516 
517 void CHMSVMModel::set_use_plifs(bool use_plifs)
518 {
519  m_use_plifs = use_plifs;
520 }
521 
523 {
524  // Shorthands for the number of states, the matrix features and their dimension
525  int32_t S = m_state_model->get_num_states();
527  int32_t D = mf->get_num_features();
528 
529  // Transmission and emission weights allocation
530  m_transmission_weights = SGMatrix< float64_t >(S,S);
531  if ( m_use_plifs )
532  m_emission_weights = SGVector< float64_t >(S*D*m_num_plif_nodes);
533  else
534  m_emission_weights = SGVector< float64_t >(S*D*m_num_obs);
535 
536  // Auxiliary variables
537 
538  // Shorthand for the number of free states
539  int32_t free_states = ((CSequenceLabels*) m_labels)->get_num_states();
540  if ( m_use_plifs )
541  m_num_aux = free_states*D*(m_num_plif_nodes-1);
542  else
543  m_num_aux = free_states*D*(m_num_obs-1);
544 
545  if ( m_use_plifs )
546  {
547  // Initialize PLiF matrix
548  m_plif_matrix = new CDynamicObjectArray(S*D);
549  SG_REF(m_plif_matrix);
550 
551  // Determine the x values for the supporting points of the PLiFs
552 
553  // Count the number of points per feature, using all the feature vectors
554  int32_t N = 0;
555  for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
556  {
557  SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
558  N += feat_vec.num_cols;
559  }
560 
561  // Choose the supporting points so that roughly the same number of points fall in each bin
562  SGVector< float64_t > a = SGVector< float64_t >::linspace_vec(1, N, m_num_plif_nodes+1);
563  SGVector< index_t > signal_idxs(m_num_plif_nodes);
564  for ( int32_t i = 0 ; i < signal_idxs.vlen ; ++i )
565  signal_idxs[i] = (index_t) CMath::round( (a[i] + a[i+1]) / 2 ) - 1;
566 
567  SGVector< float64_t > signal(N);
568  index_t idx; // used to populate signal
569  for ( int32_t f = 0 ; f < D ; ++f )
570  {
571  // Get the points of feature f of all the feature vectors
572  idx = 0;
573  for ( int32_t i = 0 ; i < mf->get_num_vectors() ; ++i )
574  {
575  SGMatrix<float64_t> feat_vec = mf->get_feature_vector(i);
576  for ( int32_t j = 0 ; j < feat_vec.num_cols ; ++j )
577  signal[idx++] = feat_vec(f,j);
578  }
579 
580  signal.qsort();
581  SGVector< float64_t > limits(m_num_plif_nodes);
582  for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
583  limits[i] = signal[ signal_idxs[i] ];
584 
585  // Set the PLiFs' supporting points
586  for ( int32_t s = 0 ; s < S ; ++s )
587  {
588  CPlif* plif = new CPlif(m_num_plif_nodes);
589  plif->set_plif_limits(limits);
590  plif->set_min_value(-CMath::INFTY);
592  m_plif_matrix->push_back(plif);
593  }
594  }
595  }
596 }
597 
599 {
600  return m_transmission_weights;
601 }
602 
604 {
605  return m_emission_weights;
606 }
607 
609 {
610  SG_REF(m_state_model);
611  return m_state_model;
612 }

SHOGUN Machine Learning Toolbox - Documentation