HMSVMModel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Fernando José Iglesias García
00008  * Copyright (C) 2012 Fernando José Iglesias García
00009  */
00010 
00011 #include <shogun/structure/HMSVMModel.h>
00012 #include <shogun/features/MatrixFeatures.h>
00013 #include <shogun/structure/TwoStateModel.h>
00014 
00015 using namespace shogun;
00016 
00017 CHMSVMModel::CHMSVMModel()
00018 : CStructuredModel()
00019 {
00020     init();
00021 }
00022 
00023 CHMSVMModel::CHMSVMModel(CFeatures* features, CStructuredLabels* labels, EStateModelType smt, int32_t num_obs)
00024 : CStructuredModel(features, labels)
00025 {
00026     init();
00027 
00028     m_num_obs = num_obs;
00029     // Shorthand for the number of free states
00030     int32_t free_states = ((CHMSVMLabels*) m_labels)->get_num_states();
00031     // Shorthand for the number of features of the feature vector
00032     int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
00033     m_num_aux = free_states*D*(num_obs-1);
00034 
00035     switch (smt)
00036     {
00037         case SMT_TWO_STATE:
00038             m_state_model = new CTwoStateModel();
00039             break;
00040         case SMT_UNKNOWN:
00041         default:
00042             SG_ERROR("The EStateModelType given is not valid\n");
00043     }
00044 
00045     int32_t S = m_state_model->get_num_states();
00046     m_transmission_weights = SGMatrix< float64_t >(S,S);
00047     m_emission_weights     = SGVector< float64_t >(S*D*m_num_obs);
00048 }
00049 
00050 CHMSVMModel::~CHMSVMModel()
00051 {
00052     SG_UNREF(m_state_model);
00053 }
00054 
00055 int32_t CHMSVMModel::get_dim() const
00056 {
00057     // Shorthand for the number of states
00058     int32_t S = ((CHMSVMLabels*) m_labels)->get_num_states();
00059     // Shorthand for the number of features of the feature vector
00060     int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
00061 
00062     return S*(S + D*m_num_obs);
00063 }
00064 
00065 SGVector< float64_t > CHMSVMModel::get_joint_feature_vector(
00066         int32_t feat_idx,
00067         CStructuredData* y)
00068 {
00069     // Shorthand for the number of features of the feature vector
00070     CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features;
00071     int32_t D = mf->get_num_features();
00072 
00073     // Get the sequence of labels
00074     CSequence* label_seq = CSequence::obtain_from_generic(y);
00075 
00076     // Initialize psi
00077     SGVector< float64_t > psi(get_dim());
00078     psi.zero();
00079 
00080     // Translate from labels sequence to state sequence
00081     SGVector< int32_t > state_seq = m_state_model->labels_to_states(label_seq);
00082     m_transmission_weights.zero();
00083 
00084     for ( int32_t i = 0 ; i < state_seq.vlen-1 ; ++i )
00085         m_transmission_weights(state_seq[i],state_seq[i+1]) += 1;
00086 
00087     SGMatrix< float64_t > obs = mf->get_feature_vector(feat_idx);
00088     ASSERT(obs.num_rows == D && obs.num_cols == state_seq.vlen);
00089     m_emission_weights.zero();
00090     index_t aux_idx, weight_idx;
00091 
00092     for ( int32_t f = 0 ; f < D ; ++f )
00093     {
00094         aux_idx = f*m_num_obs;
00095 
00096         for ( int32_t j = 0 ; j < state_seq.vlen ; ++j )
00097         {
00098             weight_idx = aux_idx + state_seq[j]*D*m_num_obs + obs(f,j);
00099             m_emission_weights[weight_idx] += 1;
00100         }
00101     }
00102 
00103     m_state_model->weights_to_vector(psi, m_transmission_weights, m_emission_weights,
00104             D, m_num_obs);
00105 
00106     return psi;
00107 }
00108 
00109 CResultSet* CHMSVMModel::argmax(
00110         SGVector< float64_t > w,
00111         int32_t feat_idx,
00112         bool const training)
00113 {
00114     int32_t dim = get_dim();
00115     ASSERT( w.vlen == get_dim() );
00116 
00117     // Shorthand for the number of features of the feature vector
00118     CMatrixFeatures< float64_t >* mf = (CMatrixFeatures< float64_t >*) m_features;
00119     int32_t D = mf->get_num_features();
00120     // Shorthand for the number of states
00121     int32_t S = m_state_model->get_num_states();
00122 
00123     // Distribution of start states
00124     SGVector< float64_t > p = m_state_model->get_start_states();
00125     // Distribution of stop states
00126     SGVector< float64_t > q = m_state_model->get_stop_states();
00127 
00128     // Compute the loss-augmented emission matrix:
00129     // E_{s,i} = w^{em}_s \cdot x_i + Delta(y_i, s)
00130 
00131     SGMatrix< float64_t > x = mf->get_feature_vector(feat_idx);
00132 
00133     int32_t T = x.num_cols;
00134     SGMatrix< float64_t > E(S, T);
00135     E.zero();
00136     index_t em_idx;
00137     m_state_model->reshape_emission_params(m_emission_weights, w, D, m_num_obs);
00138 
00139     for ( int32_t i = 0 ; i < T ; ++i )
00140     {
00141         for ( int32_t j = 0 ; j < D ; ++j )
00142         {
00143             //FIXME make independent of observation values
00144             em_idx = j*m_num_obs + (index_t)CMath::round(x(j,i));
00145 
00146             for ( int32_t s = 0 ; s < S ; ++s )
00147                 E(s,i) += m_emission_weights[s*D*m_num_obs + em_idx];
00148         }
00149     }
00150 
00151     // If argmax used while training, add to E the loss matrix
00152     if ( training )
00153     {
00154         CSequence* ytrue =
00155             CSequence::obtain_from_generic(m_labels->get_label(feat_idx));
00156 
00157         REQUIRE(ytrue->get_data().size() == T, "T, the length of the feature "
00158             "x^i (%d) and the length of its corresponding label y^i "
00159             "(%d) must be the same.\n", T, ytrue->get_data().size());
00160 
00161         SGMatrix< float64_t > loss_matrix = m_state_model->loss_matrix(ytrue);
00162 
00163         ASSERT(loss_matrix.num_rows == E.num_rows &&
00164                loss_matrix.num_cols == E.num_cols);
00165 
00166         SGVector< float64_t >::add(E.matrix, 1.0, E.matrix,
00167                 1.0, loss_matrix.matrix, E.num_rows*E.num_cols);
00168 
00169         // Decrement the reference count corresponding to get_label above
00170         SG_UNREF(ytrue);
00171     }
00172 
00173     // Initialize the dynamic programming table and the traceback matrix
00174     SGMatrix< float64_t >  dp(T, S);
00175     SGMatrix< float64_t > trb(T, S);
00176     m_state_model->reshape_transmission_params(m_transmission_weights, w);
00177 
00178     for ( int32_t s = 0 ; s < S ; ++s )
00179     {
00180         if ( p[s] > -CMath::INFTY )
00181         {
00182             // dp(0,s) = E(s,0)
00183             dp(0,s) = E[s];
00184         }
00185         else
00186         {
00187             dp(0,s) = -CMath::INFTY;
00188         }
00189     }
00190 
00191     // Viterbi algorithm
00192     int32_t idx;
00193     float64_t tmp_score, e, a;
00194 
00195     for ( int32_t i = 1 ; i < T ; ++i )
00196     {
00197         for ( int32_t cur = 0 ; cur < S ; ++cur )
00198         {
00199             idx = cur*T + i;
00200 
00201              dp[idx] = -CMath::INFTY;
00202             trb[idx] = -1;
00203 
00204             // e = E(cur,i)
00205             e = E[i*S + cur];
00206 
00207             for ( int32_t prev = 0 ; prev < S ; ++prev )
00208             {
00209                 // aij = m_transmission_weights(prev, cur)
00210                 a = m_transmission_weights[cur*S + prev];
00211 
00212                 if ( a > -CMath::INFTY )
00213                 {
00214                     // tmp_score = e + a + dp(i-1, prev)
00215                     tmp_score = e + a + dp[prev*T + i-1];
00216 
00217                     if ( tmp_score > dp[idx] )
00218                     {
00219                          dp[idx] = tmp_score;
00220                         trb[idx] = prev;
00221                     }
00222                 }
00223             }
00224         }
00225     }
00226 
00227     // Trace back the most likely sequence of states
00228     SGVector< int32_t > opt_path(T);
00229     CResultSet* ret = new CResultSet();
00230     SG_REF(ret);
00231     ret->score = -CMath::INFTY;
00232     opt_path[T-1] = -1;
00233 
00234     for ( int32_t s = 0 ; s < S ; ++s )
00235     {
00236         idx = s*T + T-1;
00237 
00238         if ( q[s] > -CMath::INFTY && dp[idx] > ret->score )
00239         {
00240             ret->score = dp[idx];
00241             opt_path[T-1] = s;
00242         }
00243     }
00244 
00245     for ( int32_t i = T-1 ; i > 0 ; --i )
00246         opt_path[i-1] = trb[opt_path[i]*T + i];
00247 
00248     // Populate the CResultSet object to return
00249     CSequence* ypred = m_state_model->states_to_labels(opt_path);
00250 
00251     ret->psi_pred = get_joint_feature_vector(feat_idx, ypred);
00252     ret->argmax   = ypred;
00253     if ( training )
00254     {
00255         ret->delta     = CStructuredModel::delta_loss(feat_idx, ypred);
00256         ret->psi_truth = CStructuredModel::get_joint_feature_vector(
00257                     feat_idx, feat_idx);
00258         ret->score    -= SGVector< float64_t >::dot(w.vector,
00259                     ret->psi_truth.vector, dim);
00260     }
00261 
00262     return ret;
00263 }
00264 
00265 float64_t CHMSVMModel::delta_loss(CStructuredData* y1, CStructuredData* y2)
00266 {
00267     CSequence* seq1 = CSequence::obtain_from_generic(y1);
00268     CSequence* seq2 = CSequence::obtain_from_generic(y2);
00269 
00270     // Compute the Hamming loss, number of distinct elements in the sequences
00271     return m_state_model->loss(seq1, seq2);
00272 }
00273 
00274 void CHMSVMModel::init_opt(
00275         SGMatrix< float64_t > & A,
00276         SGVector< float64_t > a,
00277         SGMatrix< float64_t > B,
00278         SGVector< float64_t > & b,
00279         SGVector< float64_t > lb,
00280         SGVector< float64_t > ub,
00281         SGMatrix< float64_t > & C)
00282 {
00283     // Shorthand for the number of free states
00284     int32_t S = ((CHMSVMLabels*) m_labels)->get_num_states();
00285     // Shorthand for the number of features of the feature vector
00286     int32_t D = ((CMatrixFeatures< float64_t >*) m_features)->get_num_features();
00287 
00288     // Monotonicity constraints for feature scoring functions
00289     SGVector< int32_t > monotonicity = m_state_model->get_monotonicity(S,D);
00290 
00291     // Quadratic regularizer
00292 
00293     float64_t C_small  =  5.0;
00294     float64_t C_smooth = 10.0;
00295     // TODO change the representation of C to sparse matrix
00296     C = SGMatrix< float64_t >(get_dim()+m_num_aux, get_dim()+m_num_aux);
00297     C.zero();
00298     for ( int32_t i = 0 ; i < get_dim() ; ++i )
00299         C(i,i) = C_small;
00300     for ( int32_t i = get_dim() ; i < get_dim()+m_num_aux ; ++i )
00301         C(i,i) = C_smooth;
00302 
00303     // Smoothness constraints
00304 
00305     // For each auxiliary variable, there are two different constraints
00306     // TODO change the representation of A to sparse matrix
00307     A = SGMatrix< float64_t >(2*m_num_aux, get_dim()+m_num_aux);
00308     A.zero();
00309 
00310     // Indices to the beginning of the blocks of scores. Each block is
00311     // formed by the scores of a pair (state, feature)
00312     SGVector< int32_t > score_starts(S*D);
00313     for ( int32_t idx = S*S, k = 0 ; k < S*D ; idx += m_num_obs, ++k )
00314         score_starts[k] = idx;
00315 
00316     // Indices to the beginning of the blocks of variables for smoothness
00317     SGVector< int32_t > aux_starts_smooth(S*D);
00318     for ( int32_t idx = get_dim(), k = 0 ; k < S*D ; idx += m_num_obs-1, ++k )
00319         aux_starts_smooth[k] = idx;
00320 
00321     // Bound the difference between adjacent score values from above and
00322     // below by an auxiliary variable (which then is regularized
00323     // quadratically)
00324 
00325     int32_t con_idx = 0, scr_idx, aux_idx;
00326 
00327     for ( int32_t i = 0 ; i < score_starts.vlen ; ++i )
00328     {
00329         scr_idx = score_starts[i];
00330         aux_idx = aux_starts_smooth[i];
00331 
00332         for ( int32_t j = 0 ; j < m_num_obs-1 ; ++j )
00333         {
00334             A(con_idx, scr_idx)   =  1;
00335             A(con_idx, scr_idx+1) = -1;
00336 
00337             if ( monotonicity[i] != 1 )
00338                 A(con_idx, aux_idx) = -1;
00339             ++con_idx;
00340 
00341             A(con_idx, scr_idx)   = -1;
00342             A(con_idx, scr_idx+1) =  1;
00343 
00344             if ( monotonicity[i] != -1 )
00345                 A(con_idx, aux_idx) = -1;
00346             ++con_idx;
00347 
00348             ++scr_idx, ++aux_idx;
00349         }
00350     }
00351 
00352     // Bounds for the smoothness constraints
00353     b = SGVector< float64_t >(2*m_num_aux);
00354     b.zero();
00355 }
00356 
00357 bool CHMSVMModel::check_training_setup() const
00358 {
00359     // Shorthand for the labels in the correct type
00360     CHMSVMLabels* hmsvm_labels = (CHMSVMLabels*) m_labels;
00361     // Frequency of each state
00362     SGVector< int32_t > state_freq( hmsvm_labels->get_num_states() );
00363     state_freq.zero();
00364 
00365     CSequence* seq;
00366     int32_t state;
00367     for ( int32_t i = 0 ; i < hmsvm_labels->get_num_labels() ; ++i )
00368     {
00369         seq = CSequence::obtain_from_generic(hmsvm_labels->get_label(i));
00370 
00371         SGVector<int32_t> seq_data = seq->get_data();
00372         for ( int32_t j = 0 ; j < seq_data.size() ; ++j )
00373         {
00374             state = seq_data[j];
00375 
00376             if ( state < 0 || state >= hmsvm_labels->get_num_states() )
00377             {
00378                 SG_ERROR("Found state out of {0, 1, ..., "
00379                      "num_states-1}\n");
00380                 return false;
00381             }
00382             else
00383             {
00384                 ++state_freq[state];
00385             }
00386         }
00387 
00388         // Decrement the reference count increased by get_label
00389         SG_UNREF(seq);
00390     }
00391 
00392     for ( int32_t i = 0 ; i < hmsvm_labels->get_num_states() ; ++i )
00393     {
00394         if ( state_freq[i] <= 0 )
00395         {
00396             SG_ERROR("What? State %d has never appeared\n", i);
00397             return false;
00398         }
00399     }
00400 
00401     return true;
00402 }
00403 
00404 void CHMSVMModel::init()
00405 {
00406     SG_ADD(&m_num_states, "m_num_states", "The number of states", MS_NOT_AVAILABLE);
00407     SG_ADD((CSGObject**) &m_state_model, "m_state_model", "The state model", MS_NOT_AVAILABLE);
00408     SG_ADD(&m_transmission_weights, "m_transmission_weights",
00409             "Transmission weights used in Viterbi", MS_NOT_AVAILABLE);
00410     SG_ADD(&m_emission_weights, "m_emission_weights",
00411             "Emission weights used in Viterbi", MS_NOT_AVAILABLE);
00412 
00413     m_num_states  = 0;
00414     m_num_obs     = 0;
00415     m_num_aux     = 0;
00416     m_state_model = NULL;
00417 }
00418 
00419 int32_t CHMSVMModel::get_num_aux() const
00420 {
00421     return m_num_aux;
00422 }
00423 
00424 int32_t CHMSVMModel::get_num_aux_con() const
00425 {
00426     return 2*m_num_aux;
00427 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation