en/latest/TwoStateModel_8cpp_source.html

 /*

  * This program is free software; you can redistribute it and/or modify

  * it under the terms of the GNU General Public License as published by

  * the Free Software Foundation; either version 3 of the License, or

  * (at your option) any later version.

  *

  * Written (W) 2012 Fernando José Iglesias García

  * Copyright (C) 2012 Fernando José Iglesias García

  */


 #include <shogun/structure/TwoStateModel.h>

 #include <shogun/mathematics/Math.h>

 #include <shogun/features/MatrixFeatures.h>

 #include <shogun/structure/Plif.h>


 using namespace shogun;


 CTwoStateModel::CTwoStateModel() : CStateModel()

 {

     // The number of states in this state model is equal to four.

     // Although parameters are learnt only for two of them, other

     // two states (start and stop) are used

     m_num_states = 4;

     m_num_transmission_params = 4;


     m_state_loss_mat = SGMatrix< float64_t >(m_num_states, m_num_states);

     m_state_loss_mat.zero();

     for ( int32_t i = 0 ; i < m_num_states-1 ; ++i )

     {

         m_state_loss_mat(m_num_states-1, i) = 1;

         m_state_loss_mat(i, m_num_states-1) = 1;

     }


     // Initialize the start and stop states

     m_p = SGVector< float64_t >(m_num_states);

     m_q = SGVector< float64_t >(m_num_states);

     m_p.set_const(-CMath::INFTY);

     m_q.set_const(-CMath::INFTY);

     m_p[0] = 0; // start state

     m_q[1] = 0; // stop  state

 }


 CTwoStateModel::~CTwoStateModel()

 {

 }


 SGMatrix< float64_t > CTwoStateModel::loss_matrix(CSequence* label_seq)

 {

     SGVector< int32_t > state_seq = labels_to_states(label_seq);

     SGMatrix< float64_t > loss_mat(m_num_states, state_seq.vlen);


     for ( int32_t i = 0 ; i < loss_mat.num_cols ; ++i )

     {

         for ( int32_t s = 0 ; s < loss_mat.num_rows ; ++s )

             loss_mat(s,i) = m_state_loss_mat(s, state_seq[i]);

     }


     return loss_mat;

 }


 float64_t CTwoStateModel::loss(CSequence* label_seq_lhs, CSequence* label_seq_rhs)

 {

     SGVector< int32_t > state_seq_lhs = labels_to_states(label_seq_lhs);

     SGVector< int32_t > state_seq_rhs = labels_to_states(label_seq_rhs);


     ASSERT(state_seq_lhs.vlen == state_seq_rhs.vlen)


     float64_t ret = 0.0;

     for ( int32_t i = 0 ; i < state_seq_lhs.vlen ; ++i )

         ret += m_state_loss_mat(state_seq_lhs[i], state_seq_rhs[i]);


     return ret;

 }


 SGVector< int32_t > CTwoStateModel::labels_to_states(CSequence* label_seq) const

 {

     // 0 -> start state

     // 1 -> stop state

     // 2 -> negative state (label == 0)

     // 3 -> positive state (label == 1)


     SGVector< int32_t > seq_data = label_seq->get_data();

     SGVector< int32_t > state_seq(seq_data.size());

     for ( int32_t i = 1 ; i < state_seq.vlen-1 ; ++i )

     {

         //FIXME make independent of values 0-1 in labels

         state_seq[i] = seq_data[i] + 2;

     }


     // The first element is always start state

     state_seq[0] = 0;

     // The last element is always stop state

     state_seq[state_seq.vlen-1] = 1;


     return state_seq;

 }


 CSequence* CTwoStateModel::states_to_labels(SGVector< int32_t > state_seq) const

 {

     SGVector< int32_t > label_seq(state_seq.vlen);


     //FIXME make independent of values 0-1 in labels

     // Legend for state indices:

     // 0 -> start state => label 0

     // 1 -> stop state => label 0

     // 2 -> negative state (label == 0) => label 0

     // 3 -> positive state (label == 1) => label 1

     label_seq.zero();

     for ( int32_t i = 0 ; i < state_seq.vlen ; ++i )

     {

         if ( state_seq[i] == 3 )

             label_seq[i] = 1;

     }


     CSequence* ret = new CSequence(label_seq);

     SG_REF(ret);

     return ret;

 }


 void CTwoStateModel::reshape_emission_params(SGVector< float64_t >& emission_weights,

         SGVector< float64_t > w, int32_t num_feats, int32_t num_obs)

 {

     emission_weights.zero();


     // Legend for state indices:

     // 0 -> start state

     // 1 -> stop state

     // 2 -> negative state (label == 0)

     // 3 -> positive state (label == 1)

     //

     // start and stop states have no emission scores


     index_t em_idx, w_idx = m_num_transmission_params;

     for ( int32_t s = 2 ; s < m_num_states ; ++s )

     {

         for ( int32_t f = 0 ; f < num_feats ; ++f )

         {

             for ( int32_t o = 0 ; o < num_obs ; ++o )

             {

                 em_idx = s*num_feats*num_obs + f*num_obs + o;

                 emission_weights[em_idx] = w[w_idx++];

             }

         }

     }

 }


 void CTwoStateModel::reshape_emission_params(CDynamicObjectArray* plif_matrix,

         SGVector< float64_t > w, int32_t num_feats, int32_t num_plif_nodes)

 {

     CPlif* plif;

     index_t p_idx, w_idx = m_num_transmission_params;

     for ( int32_t s = 2 ; s < m_num_states ; ++s )

     {

         for ( int32_t f = 0 ; f < num_feats ; ++f )

         {

             SGVector< float64_t > penalties(num_plif_nodes);

             p_idx = 0;


             for ( int32_t i = 0 ; i < num_plif_nodes ; ++i )

                 penalties[p_idx++] = w[w_idx++];


             plif = (CPlif*) plif_matrix->get_element(m_num_states*f + s);

             plif->set_plif_penalty(penalties);

             SG_UNREF(plif);

         }

     }

 }


 void CTwoStateModel::reshape_transmission_params(

         SGMatrix< float64_t >& transmission_weights, SGVector< float64_t > w)

 {

     transmission_weights.set_const(-CMath::INFTY);


     // Legend for state indices:

     // 0 -> start state

     // 1 -> stop state

     // 2 -> negative state (label == 0)

     // 3 -> positive state (label == 1)


     // From start

     transmission_weights(0,2) = 0;    // to negative

     transmission_weights(0,3) = 0;    // to positive

     // From negative

     transmission_weights(2,1) = 0;    // to stop

     transmission_weights(2,2) = w[0]; // to negative

     transmission_weights(2,3) = w[1]; // to positive

     // From positive

     transmission_weights(3,1) = 0;    // to stop

     transmission_weights(3,2) = w[3]; // to positive

     transmission_weights(3,3) = w[2]; // to negative

 }


 void CTwoStateModel::weights_to_vector(SGVector< float64_t >& psi,

         SGMatrix< float64_t > transmission_weights,

         SGVector< float64_t > emission_weights,

         int32_t num_feats, int32_t num_obs) const

 {

     // Legend for state indices:

     // 0 -> start state

     // 1 -> stop state

     // 2 -> negative state

     // 3 -> positive state

     psi[0] = transmission_weights(2,2);

     psi[1] = transmission_weights(2,3);

     psi[2] = transmission_weights(3,3);

     psi[3] = transmission_weights(3,2);


     // start and stop states have no emission scores

     index_t obs_idx, psi_idx = m_num_transmission_params;

     for ( int32_t s = 2 ; s < m_num_states ; ++s )

     {

         for ( int32_t f = 0 ; f < num_feats ; ++f )

         {

             for ( int32_t o = 0 ; o < num_obs ; ++o )

             {

                 obs_idx = s*num_feats*num_obs + f*num_obs + o;

                 psi[psi_idx++] = emission_weights[obs_idx];

             }

         }

     }


 }


 SGVector< float64_t > CTwoStateModel::weights_to_vector(SGMatrix< float64_t > transmission_weights,

         SGVector< float64_t > emission_weights, int32_t num_feats, int32_t num_obs) const

 {

     int32_t num_free_states = 2;

     SGVector< float64_t > vec(num_free_states*(num_free_states + num_feats*num_obs));

     vec.zero();

     weights_to_vector(vec, transmission_weights, emission_weights, num_feats, num_obs);

     return vec;

 }


 SGVector< int32_t > CTwoStateModel::get_monotonicity(int32_t num_free_states,

         int32_t num_feats) const

 {

     REQUIRE(num_free_states == 2, "Using the TwoStateModel only two states are free\n")


     SGVector< int32_t > monotonicity(num_feats*num_free_states);


     for ( int32_t i = 0 ; i < num_feats ; ++i )

         monotonicity[i] = -1;

     for ( int32_t i = num_feats ; i < 2*num_feats ; ++i )

         monotonicity[i] = +1;


     return monotonicity;

 }


 CHMSVMModel* CTwoStateModel::simulate_data(int32_t num_exm, int32_t exm_len,

     int32_t num_features, int32_t num_noise_features)

 {

     // Number of different states

     int32_t num_states = 2;

     // Min and max length of positive block

     int32_t block_len[] = {10, 100};

     // Min and max number of positive blocks per example

     int32_t num_blocks[] = {0, 3};


     // Proportion of wrong labels

     float64_t prop_distort = 0.2;

     // Standard deviation of Gaussian noise

     float64_t noise_std = 4;


     // Generate label sequence randomly containing from num_blocks[0] to

     // num_blocks[1] blocks of positive labels each of length between

     // block_len[0] and block_len[1]


     CSequenceLabels* labels = new CSequenceLabels(num_exm, num_states);

     SGVector< int32_t > ll(num_exm*exm_len);

     ll.zero();

     int32_t rnb, rl, rp;


     for ( int32_t i = 0 ; i < num_exm ; ++i)

     {

         SGVector< int32_t > lab(exm_len);

         lab.zero();

         rnb = num_blocks[0] + CMath::ceil((num_blocks[1]-num_blocks[0])*

             CMath::random(0.0, 1.0)) - 1;


         for ( int32_t j = 0 ; j < rnb ; ++j )

         {

             rl = block_len[0] + CMath::ceil((block_len[1]-block_len[0])*

                 CMath::random(0.0, 1.0)) - 1;

             rp = CMath::ceil((exm_len-rl)*CMath::random(0.0, 1.0));


             for ( int32_t idx = rp-1 ; idx < rp+rl ; ++idx )

             {

                 lab[idx] = 1;

                 ll[i*exm_len + idx] = 1;

             }

         }


         labels->add_vector_label(lab);

     }


     // Generate features by

     // i) introducing label noise, i.e. flipping a propotion prop_distort

     // of labels and

     // ii) adding Gaussian noise to the (distorted) label sequence


     SGVector< int32_t >   distort(num_exm*exm_len);

     SGVector< int32_t >   d1(CMath::round(distort.vlen*prop_distort));

     SGVector< int32_t >   d2(d1.vlen);

     SGVector< int32_t >   lf;

     SGMatrix< float64_t > signal(num_features, distort.vlen);


     distort.range_fill();

     for ( int32_t i = 0 ; i < num_features ; ++i )

     {

         lf = ll;

         CMath::permute(distort);


         for ( int32_t j = 0 ; j < d1.vlen ; ++j )

             d1[j] = distort[j];


         for ( int32_t j = 0 ; j < d2.vlen ; ++j )

             d2[j] = distort[ distort.vlen-d2.vlen+j ];


         for ( int32_t j = 0 ; j < d1.vlen ; ++j )

             lf[ d1[j] ] = lf[ d2[j] ];


         int32_t idx = i*signal.num_cols;

         for ( int32_t j = 0 ; j < signal.num_cols ; ++j )

             signal[idx++] = lf[j] + noise_std*CMath::normal_random((float64_t)0.0, 1.0);

     }


     // Substitute some features by pure noise

     for ( int32_t i = 0 ; i < num_noise_features ; ++i )

     {

         int32_t idx = i*signal.num_cols;

         for ( int32_t j = 0 ; j < signal.num_cols ; ++j )

             signal[idx++] = noise_std*CMath::normal_random((float64_t)0.0, 1.0);

     }


     CMatrixFeatures< float64_t >* features =

         new CMatrixFeatures< float64_t >(signal, exm_len, num_exm);


     int32_t num_obs = 0; // continuous observations, dummy value

     bool use_plifs = true;

     return new CHMSVMModel(features, labels, SMT_TWO_STATE, num_obs, use_plifs);

 }

shogun::CStateModel::m_state_loss_mat
SGMatrix< float64_t > m_state_loss_mat
Definition: StateModel.h:225

shogun::SGVector::range_fill
void range_fill(T start=0)
Definition: SGVector.cpp:171

shogun::CMath::permute
static void permute(SGVector< T > v, CRandom *rand=NULL)
Definition: Math.h:1144

shogun::CTwoStateModel::~CTwoStateModel
virtual ~CTwoStateModel()
Definition: TwoStateModel.cpp:43

shogun::CPlif::set_plif_penalty
void set_plif_penalty(SGVector< float64_t > p_penalties)
Definition: Plif.h:272

Math.h

shogun::CTwoStateModel::reshape_emission_params
virtual void reshape_emission_params(SGVector< float64_t > &emission_weights, SGVector< float64_t > w, int32_t num_feats, int32_t num_obs)
Definition: TwoStateModel.cpp:120

index_t
int32_t index_t
Definition: common.h:62

shogun::CMath::ceil
static float64_t ceil(float64_t d)
Definition: Math.h:416

shogun::CMath::INFTY
static const float64_t INFTY
infinity
Definition: Math.h:2048

shogun::CMath::normal_random
static float32_t normal_random(float32_t mean, float32_t std_dev)
Definition: Math.h:1095

shogun::CHMSVMModel
Class CHMSVMModel that represents the application specific model and contains the application depende...
Definition: HMSVMModel.h:31

shogun::CStateModel::m_q
SGVector< float64_t > m_q
Definition: StateModel.h:231

REQUIRE
#define REQUIRE(x,...)
Definition: SGIO.h:206

shogun::SGMatrix::num_cols
index_t num_cols
Definition: SGMatrix.h:376

shogun::CTwoStateModel::loss_matrix
virtual SGMatrix< float64_t > loss_matrix(CSequence *label_seq)
Definition: TwoStateModel.cpp:47

shogun::CSequenceLabels
Class CSequenceLabels used e.g. in the application of Structured Output (SO) learning to Hidden Marko...
Definition: SequenceLabels.h:73

shogun::CStateModel
class CStateModel base, abstract class for the internal state representation used in the CHMSVMModel...
Definition: StateModel.h:30

shogun::CTwoStateModel::states_to_labels
virtual CSequence * states_to_labels(SGVector< int32_t > state_seq) const
Definition: TwoStateModel.cpp:98

shogun::CStateModel::m_p
SGVector< float64_t > m_p
Definition: StateModel.h:228

SG_REF
#define SG_REF(x)
Definition: SGObject.h:54

shogun::SGMatrix::zero
void zero()
Definition: SGMatrix.cpp:136

shogun::CPlif
class Plif
Definition: Plif.h:40

shogun::SGMatrix::num_rows
index_t num_rows
Definition: SGMatrix.h:374

shogun::SGMatrix< float64_t >

shogun::CSequence
Class CSequence to be used in the application of Structured Output (SO) learning to Hidden Markov Sup...
Definition: SequenceLabels.h:28

shogun::SMT_TWO_STATE
Definition: StateModelTypes.h:23

shogun::CMath::random
static uint64_t random()
Definition: Math.h:1019

shogun::CTwoStateModel::loss
virtual float64_t loss(CSequence *label_seq_lhs, CSequence *label_seq_rhs)
Definition: TwoStateModel.cpp:61

shogun::CTwoStateModel::weights_to_vector
virtual void weights_to_vector(SGVector< float64_t > &psi, SGMatrix< float64_t > transmission_weights, SGVector< float64_t > emission_weights, int32_t num_feats, int32_t num_obs) const
Definition: TwoStateModel.cpp:193

shogun::SGVector::size
int32_t size() const
Definition: SGVector.h:113

shogun::CTwoStateModel::labels_to_states
virtual SGVector< int32_t > labels_to_states(CSequence *label_seq) const
Definition: TwoStateModel.cpp:75

MatrixFeatures.h

shogun::SGVector::vlen
index_t vlen
Definition: SGVector.h:494

shogun::SGVector::zero
void zero()
Definition: SGVector.cpp:136

ASSERT
#define ASSERT(x)
Definition: SGIO.h:201

shogun::SGVector< float64_t >

float64_t
double float64_t
Definition: common.h:50

shogun::CSequenceLabels::add_vector_label
void add_vector_label(SGVector< int32_t > label)
Definition: SequenceLabels.cpp:46

shogun::CDynamicObjectArray
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
Definition: DynamicObjectArray.h:31

Plif.h

shogun::CStateModel::m_num_transmission_params
int32_t m_num_transmission_params
Definition: StateModel.h:222

SG_UNREF
#define SG_UNREF(x)
Definition: SGObject.h:55

shogun
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

shogun::CTwoStateModel::reshape_transmission_params
virtual void reshape_transmission_params(SGMatrix< float64_t > &transmission_weights, SGVector< float64_t > w)
Definition: TwoStateModel.cpp:169

shogun::CMatrixFeatures
Class CMatrixFeatures used to represent data whose feature vectors are better represented with matric...
Definition: MatrixFeatures.h:31

shogun::CTwoStateModel::simulate_data
static CHMSVMModel * simulate_data(int32_t num_exm, int32_t exm_len, int32_t num_features, int32_t num_noise_features)
Definition: TwoStateModel.cpp:249

shogun::CSequence::get_data
SGVector< int32_t > get_data() const
Definition: SequenceLabels.h:61

shogun::CDynamicObjectArray::get_element
CSGObject * get_element(int32_t index) const
Definition: DynamicObjectArray.h:194

shogun::CTwoStateModel::get_monotonicity
virtual SGVector< int32_t > get_monotonicity(int32_t num_free_states, int32_t num_feats) const
Definition: TwoStateModel.cpp:234

shogun::CMath::round
static float64_t round(float64_t d)
Definition: Math.h:398

shogun::CTwoStateModel::CTwoStateModel
CTwoStateModel()
Definition: TwoStateModel.cpp:18

shogun::SGMatrix::set_const
void set_const(T const_elem)
Definition: SGMatrix.cpp:129

shogun::SGVector::set_const
void set_const(T const_elem)
Definition: SGVector.cpp:150

shogun::CStateModel::m_num_states
int32_t m_num_states
Definition: StateModel.h:219

TwoStateModel.h