SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
HMM.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #ifndef __CHMM_H__
13 #define __CHMM_H__
14 
16 #include <shogun/lib/common.h>
17 #include <shogun/io/SGIO.h>
18 #include <shogun/lib/config.h>
22 
23 #ifdef USE_HMMPARALLEL
24 #define USE_HMMPARALLEL_STRUCTURES 1
25 #endif
26 
27 namespace shogun
28 {
29  class CFeatures;
30  template <class ST> class CStringFeatures;
33 
36 
37 #ifndef DOXYGEN_SHOULD_SKIP_THIS
38 
39 struct T_ALPHA_BETA
40 {
42  int32_t dimension;
43 
45  T_ALPHA_BETA_TABLE* table;
46 
48  bool updated;
49 
51  float64_t sum;
52 };
53 #endif // DOXYGEN_SHOULD_SKIP_THIS
54 
59 #ifdef USE_BIGSTATES
60 typedef uint16_t T_STATES ;
61 #else
62 typedef uint8_t T_STATES ;
63 #endif
64 typedef T_STATES* P_STATES ;
65 
67 
70 {
81 };
82 
83 
85 class Model
86 {
87  public:
89  Model();
90 
92  virtual ~Model();
93 
95  inline void sort_learn_a()
96  {
97  CMath::sort(learn_a,2) ;
98  }
99 
101  inline void sort_learn_b()
102  {
103  CMath::sort(learn_b,2) ;
104  }
105 
110 
111  inline int32_t get_learn_a(int32_t line, int32_t column) const
112  {
113  return learn_a[line*2 + column];
114  }
115 
117  inline int32_t get_learn_b(int32_t line, int32_t column) const
118  {
119  return learn_b[line*2 + column];
120  }
121 
123  inline int32_t get_learn_p(int32_t offset) const
124  {
125  return learn_p[offset];
126  }
127 
129  inline int32_t get_learn_q(int32_t offset) const
130  {
131  return learn_q[offset];
132  }
133 
135  inline int32_t get_const_a(int32_t line, int32_t column) const
136  {
137  return const_a[line*2 + column];
138  }
139 
141  inline int32_t get_const_b(int32_t line, int32_t column) const
142  {
143  return const_b[line*2 + column];
144  }
145 
147  inline int32_t get_const_p(int32_t offset) const
148  {
149  return const_p[offset];
150  }
151 
153  inline int32_t get_const_q(int32_t offset) const
154  {
155  return const_q[offset];
156  }
157 
159  inline float64_t get_const_a_val(int32_t line) const
160  {
161  return const_a_val[line];
162  }
163 
165  inline float64_t get_const_b_val(int32_t line) const
166  {
167  return const_b_val[line];
168  }
169 
171  inline float64_t get_const_p_val(int32_t offset) const
172  {
173  return const_p_val[offset];
174  }
175 
177  inline float64_t get_const_q_val(int32_t offset) const
178  {
179  return const_q_val[offset];
180  }
181 #ifdef FIX_POS
182 
183  inline char get_fix_pos_state(int32_t pos, T_STATES state, T_STATES num_states)
184  {
185 #ifdef HMM_DEBUG
186  if ((pos<0)||(pos*num_states+state>65336))
187  SG_DEBUG("index out of range in get_fix_pos_state(%i,%i,%i) \n", pos,state,num_states)
188 #endif
189  return fix_pos_state[pos*num_states+state] ;
190  }
191 #endif
192 
193 
198 
199  inline void set_learn_a(int32_t offset, int32_t value)
200  {
201  learn_a[offset]=value;
202  }
203 
205  inline void set_learn_b(int32_t offset, int32_t value)
206  {
207  learn_b[offset]=value;
208  }
209 
211  inline void set_learn_p(int32_t offset, int32_t value)
212  {
213  learn_p[offset]=value;
214  }
215 
217  inline void set_learn_q(int32_t offset, int32_t value)
218  {
219  learn_q[offset]=value;
220  }
221 
223  inline void set_const_a(int32_t offset, int32_t value)
224  {
225  const_a[offset]=value;
226  }
227 
229  inline void set_const_b(int32_t offset, int32_t value)
230  {
231  const_b[offset]=value;
232  }
233 
235  inline void set_const_p(int32_t offset, int32_t value)
236  {
237  const_p[offset]=value;
238  }
239 
241  inline void set_const_q(int32_t offset, int32_t value)
242  {
243  const_q[offset]=value;
244  }
245 
247  inline void set_const_a_val(int32_t offset, float64_t value)
248  {
249  const_a_val[offset]=value;
250  }
251 
253  inline void set_const_b_val(int32_t offset, float64_t value)
254  {
255  const_b_val[offset]=value;
256  }
257 
259  inline void set_const_p_val(int32_t offset, float64_t value)
260  {
261  const_p_val[offset]=value;
262  }
263 
265  inline void set_const_q_val(int32_t offset, float64_t value)
266  {
267  const_q_val[offset]=value;
268  }
269 #ifdef FIX_POS
270 
271  inline void set_fix_pos_state(
272  int32_t pos, T_STATES state, T_STATES num_states, char value)
273  {
274 #ifdef HMM_DEBUG
275  if ((pos<0)||(pos*num_states+state>65336))
276  SG_DEBUG("index out of range in set_fix_pos_state(%i,%i,%i,%i) [%i]\n", pos,state,num_states,(int)value, pos*num_states+state)
277 #endif
278  fix_pos_state[pos*num_states+state]=value;
279  if (value==FIX_ALLOWED)
280  for (int32_t i=0; i<num_states; i++)
281  if (get_fix_pos_state(pos,i,num_states)==FIX_DEFAULT)
282  set_fix_pos_state(pos,i,num_states,FIX_DISALLOWED) ;
283  }
285 
287  const static char FIX_DISALLOWED ;
288 
290  const static char FIX_ALLOWED ;
291 
293  const static char FIX_DEFAULT ;
294 
296  const static float64_t DISALLOWED_PENALTY ;
297 #endif
298  protected:
305 
306  int32_t* learn_a;
307 
309  int32_t* learn_b;
310 
312  int32_t* learn_p;
313 
315  int32_t* learn_q;
317 
324 
325  int32_t* const_a;
326 
328  int32_t* const_b;
329 
331  int32_t* const_p;
332 
334  int32_t* const_q;
335 
336 
339 
342 
345 
348 
349 #ifdef FIX_POS
350 
353  char* fix_pos_state;
354 #endif
355 
356 };
357 
358 
369 class CHMM : public CDistribution
370 {
371  private:
372 
373  T_STATES trans_list_len ;
374  T_STATES **trans_list_forward ;
375  T_STATES *trans_list_forward_cnt ;
376  float64_t **trans_list_forward_val ;
377  T_STATES **trans_list_backward ;
378  T_STATES *trans_list_backward_cnt ;
379  bool mem_initialized ;
380 
381 #ifdef USE_HMMPARALLEL_STRUCTURES
382 
384  struct S_DIM_THREAD_PARAM
385  {
386  CHMM* hmm;
387  int32_t dim;
388  float64_t prob_sum;
389  };
390 
392  struct S_BW_THREAD_PARAM
393  {
394  CHMM* hmm;
395  int32_t dim_start;
396  int32_t dim_stop;
397 
398  float64_t ret;
399 
400  float64_t* p_buf;
401  float64_t* q_buf;
402  float64_t* a_buf;
403  float64_t* b_buf;
404  };
405 
406  inline T_ALPHA_BETA & ALPHA_CACHE(int32_t dim) {
407  return alpha_cache[dim%parallel->get_num_threads()] ; } ;
408  inline T_ALPHA_BETA & BETA_CACHE(int32_t dim) {
409  return beta_cache[dim%parallel->get_num_threads()] ; } ;
410 #ifdef USE_LOGSUMARRAY
411  inline float64_t* ARRAYS(int32_t dim) {
412  return arrayS[dim%parallel->get_num_threads()] ; } ;
413 #endif
414  inline float64_t* ARRAYN1(int32_t dim) {
415  return arrayN1[dim%parallel->get_num_threads()] ; } ;
416  inline float64_t* ARRAYN2(int32_t dim) {
417  return arrayN2[dim%parallel->get_num_threads()] ; } ;
418  inline T_STATES* STATES_PER_OBSERVATION_PSI(int32_t dim) {
420  inline const T_STATES* STATES_PER_OBSERVATION_PSI(int32_t dim) const {
422  inline T_STATES* PATH(int32_t dim) {
423  return path[dim%parallel->get_num_threads()] ; } ;
424  inline bool & PATH_PROB_UPDATED(int32_t dim) {
425  return path_prob_updated[dim%parallel->get_num_threads()] ; } ;
426  inline int32_t & PATH_PROB_DIMENSION(int32_t dim) {
427  return path_prob_dimension[dim%parallel->get_num_threads()] ; } ;
428 #else
429  inline T_ALPHA_BETA & ALPHA_CACHE(int32_t /*dim*/) {
430  return alpha_cache ; } ;
431  inline T_ALPHA_BETA & BETA_CACHE(int32_t /*dim*/) {
432  return beta_cache ; } ;
433 #ifdef USE_LOGSUMARRAY
434  inline float64_t* ARRAYS(int32_t dim) {
435  return arrayS ; } ;
436 #endif
437  inline float64_t* ARRAYN1(int32_t /*dim*/) {
438  return arrayN1 ; } ;
439  inline float64_t* ARRAYN2(int32_t /*dim*/) {
440  return arrayN2 ; } ;
441  inline T_STATES* STATES_PER_OBSERVATION_PSI(int32_t /*dim*/) {
442  return states_per_observation_psi ; } ;
443  inline const T_STATES* STATES_PER_OBSERVATION_PSI(int32_t /*dim*/) const {
444  return states_per_observation_psi ; } ;
445  inline T_STATES* PATH(int32_t /*dim*/) {
446  return path ; } ;
447  inline bool & PATH_PROB_UPDATED(int32_t /*dim*/) {
448  return path_prob_updated ; } ;
449  inline int32_t & PATH_PROB_DIMENSION(int32_t /*dim*/) {
450  return path_prob_dimension ; } ;
451 #endif
452 
457  bool converged(float64_t x, float64_t y);
458 
464  public:
466  CHMM();
467 
478  CHMM(
479  int32_t N, int32_t M, Model* model, float64_t PSEUDO);
480  CHMM(
481  CStringFeatures<uint16_t>* obs, int32_t N, int32_t M,
482  float64_t PSEUDO);
483  CHMM(
484  int32_t N, float64_t* p, float64_t* q, float64_t* a);
485  CHMM(
486  int32_t N, float64_t* p, float64_t* q, int32_t num_trans,
487  float64_t* a_trans);
488 
493  CHMM(FILE* model_file, float64_t PSEUDO);
494 
496  CHMM(CHMM* h);
497 
499  virtual ~CHMM();
500 
509  virtual bool train(CFeatures* data=NULL);
510  virtual int32_t get_num_model_parameters() { return N*(N+M+2); }
511  virtual float64_t get_log_model_parameter(int32_t num_param);
512  virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example);
513  virtual float64_t get_log_likelihood_example(int32_t num_example)
514  {
515  return model_probability(num_example);
516  }
517 
523  bool initialize(Model* model, float64_t PSEUDO, FILE* model_file=NULL);
525 
528 
531 
543  float64_t forward_comp(int32_t time, int32_t state, int32_t dimension);
545  int32_t time, int32_t state, int32_t dimension);
546 
554  float64_t backward_comp(int32_t time, int32_t state, int32_t dimension);
556  int32_t time, int32_t state, int32_t dimension);
557 
562  float64_t best_path(int32_t dimension);
563  inline uint16_t get_best_path_state(int32_t dim, int32_t t)
564  {
565  ASSERT(PATH(dim))
566  return PATH(dim)[t];
567  }
568 
572 
574  inline float64_t model_probability(int32_t dimension=-1)
575  {
576  //for faster calculation cache model probability
577  if (dimension==-1)
578  {
579  if (mod_prob_updated)
581  else
583  }
584  else
585  return forward(p_observations->get_vector_length(dimension), 0, dimension);
586  }
587 
593  inline float64_t linear_model_probability(int32_t dimension)
594  {
595  float64_t lik=0;
596  int32_t len=0;
597  bool free_vec;
598  uint16_t* o=p_observations->get_feature_vector(dimension, len, free_vec);
600 
601  ASSERT(N==len)
602 
603  for (int32_t i=0; i<N; i++)
604  {
605  lik+=obs_b[*o++];
606  obs_b+=M;
607  }
608  p_observations->free_feature_vector(o, dimension, free_vec);
609  return lik;
610 
611  // sorry, the above code is the speed optimized version of :
612  /* float64_t lik=0;
613 
614  for (int32_t i=0; i<N; i++)
615  lik+=get_b(i, p_observations->get_feature(dimension, i));
616  return lik;
617  */
618  // : that
619  }
620 
622 
625  inline bool set_iterations(int32_t num) { iterations=num; return true; }
626  inline int32_t get_iterations() { return iterations; }
627  inline bool set_epsilon (float64_t eps) { epsilon=eps; return true; }
628  inline float64_t get_epsilon() { return epsilon; }
629 
634 
641  void estimate_model_baum_welch(CHMM* train);
643 
644 #ifdef USE_HMMPARALLEL_STRUCTURES
645  void ab_buf_comp(
646  float64_t* p_buf, float64_t* q_buf, float64_t* a_buf,
647  float64_t* b_buf, int32_t dim) ;
648 #else
649  void estimate_model_baum_welch_old(CHMM* train);
650 #endif
651 
656 
660  void estimate_model_viterbi(CHMM* train);
661 
666 
668 
670  bool linear_train(bool right_align=false);
671 
673  bool permutation_entropy(int32_t window_width, int32_t sequence_number);
674 
681  void output_model(bool verbose=false);
682 
684  void output_model_defined(bool verbose=false);
686 
687 
690 
692  void normalize(bool keep_dead_states=false);
693 
697  void add_states(int32_t num_states, float64_t default_val=0);
698 
704  bool append_model(
705  CHMM* append_model, float64_t* cur_out, float64_t* app_out);
706 
711 
713  void chop(float64_t value);
714 
716  void convert_to_log();
717 
719  void init_model_random();
720 
726  void init_model_defined();
727 
729  void clear_model();
730 
732  void clear_model_defined();
733 
735  void copy_model(CHMM* l);
736 
741  void invalidate_model();
742 
746  inline bool get_status() const
747  {
748  return status;
749  }
750 
752  inline float64_t get_pseudo() const
753  {
754  return PSEUDO ;
755  }
756 
758  inline void set_pseudo(float64_t pseudo)
759  {
760  PSEUDO=pseudo ;
761  }
762 
763 #ifdef USE_HMMPARALLEL_STRUCTURES
764  static void* bw_dim_prefetch(void * params);
765  static void* bw_single_dim_prefetch(void * params);
766  static void* vit_dim_prefetch(void * params);
767 #endif
768 
769 #ifdef FIX_POS
770 
773  inline bool set_fix_pos_state(int32_t pos, T_STATES state, char value)
774  {
775  if (!model)
776  return false ;
777  model->set_fix_pos_state(pos, state, N, value) ;
778  return true ;
779  } ;
780 #endif
781 
782 
791  void set_observations(CStringFeatures<uint16_t>* obs, CHMM* hmm=NULL);
792 
796  void set_observation_nocache(CStringFeatures<uint16_t>* obs);
797 
800  {
802  return p_observations;
803  }
805 
873  bool load_definitions(FILE* file, bool verbose, bool initialize=true);
874 
910  bool load_model(FILE* file);
911 
915  bool save_model(FILE* file);
916 
920  bool save_model_derivatives(FILE* file);
921 
925  bool save_model_derivatives_bin(FILE* file);
926 
930  bool save_model_bin(FILE* file);
931 
933  bool check_model_derivatives() ;
935 
941  T_STATES* get_path(int32_t dim, float64_t& prob);
942 
946  bool save_path(FILE* file);
947 
951  bool save_path_derivatives(FILE* file);
952 
956  bool save_path_derivatives_bin(FILE* file);
957 
958 #ifdef USE_HMMDEBUG
959 
960  bool check_path_derivatives() ;
961 #endif //USE_HMMDEBUG
962 
966  bool save_likelihood_bin(FILE* file);
967 
971  bool save_likelihood(FILE* file);
973 
979 
981  inline T_STATES get_N() const { return N ; }
982 
984  inline int32_t get_M() const { return M ; }
985 
990  inline void set_q(T_STATES offset, float64_t value)
991  {
992 #ifdef HMM_DEBUG
993  if (offset>=N)
994  SG_DEBUG("index out of range in set_q(%i,%e) [%i]\n", offset,value,N)
995 #endif
996  end_state_distribution_q[offset]=value;
997  }
998 
1003  inline void set_p(T_STATES offset, float64_t value)
1004  {
1005 #ifdef HMM_DEBUG
1006  if (offset>=N)
1007  SG_DEBUG("index out of range in set_p(%i,.) [%i]\n", offset,N)
1008 #endif
1009  initial_state_distribution_p[offset]=value;
1010  }
1011 
1017  inline void set_A(T_STATES line_, T_STATES column, float64_t value)
1018  {
1019 #ifdef HMM_DEBUG
1020  if ((line_>N)||(column>N))
1021  SG_DEBUG("index out of range in set_A(%i,%i,.) [%i,%i]\n",line_,column,N,N)
1022 #endif
1023  transition_matrix_A[line_+column*N]=value;
1024  }
1025 
1031  inline void set_a(T_STATES line_, T_STATES column, float64_t value)
1032  {
1033 #ifdef HMM_DEBUG
1034  if ((line_>N)||(column>N))
1035  SG_DEBUG("index out of range in set_a(%i,%i,.) [%i,%i]\n",line_,column,N,N)
1036 #endif
1037  transition_matrix_a[line_+column*N]=value; // look also best_path!
1038  }
1039 
1045  inline void set_B(T_STATES line_, uint16_t column, float64_t value)
1046  {
1047 #ifdef HMM_DEBUG
1048  if ((line_>=N)||(column>=M))
1049  SG_DEBUG("index out of range in set_B(%i,%i) [%i,%i]\n", line_, column,N,M)
1050 #endif
1051  observation_matrix_B[line_*M+column]=value;
1052  }
1053 
1059  inline void set_b(T_STATES line_, uint16_t column, float64_t value)
1060  {
1061 #ifdef HMM_DEBUG
1062  if ((line_>=N)||(column>=M))
1063  SG_DEBUG("index out of range in set_b(%i,%i) [%i,%i]\n", line_, column,N,M)
1064 #endif
1065  observation_matrix_b[line_*M+column]=value;
1066  }
1067 
1074  inline void set_psi(
1075  int32_t time, T_STATES state, T_STATES value, int32_t dimension)
1076  {
1077 #ifdef HMM_DEBUG
1078  if ((time>=p_observations->get_max_vector_length())||(state>N))
1079  SG_DEBUG("index out of range in set_psi(%i,%i,.) [%i,%i]\n",time,state,p_observations->get_max_vector_length(),N)
1080 #endif
1081  STATES_PER_OBSERVATION_PSI(dimension)[time*N+state]=value;
1082  }
1083 
1088  inline float64_t get_q(T_STATES offset) const
1089  {
1090 #ifdef HMM_DEBUG
1091  if (offset>=N)
1092  SG_DEBUG("index out of range in %e=get_q(%i) [%i]\n", end_state_distribution_q[offset],offset,N)
1093 #endif
1094  return end_state_distribution_q[offset];
1095  }
1096 
1101  inline float64_t get_p(T_STATES offset) const
1102  {
1103 #ifdef HMM_DEBUG
1104  if (offset>=N)
1105  SG_DEBUG("index out of range in get_p(%i,.) [%i]\n", offset,N)
1106 #endif
1107  return initial_state_distribution_p[offset];
1108  }
1109 
1115  inline float64_t get_A(T_STATES line_, T_STATES column) const
1116  {
1117 #ifdef HMM_DEBUG
1118  if ((line_>N)||(column>N))
1119  SG_DEBUG("index out of range in get_A(%i,%i) [%i,%i]\n",line_,column,N,N)
1120 #endif
1121  return transition_matrix_A[line_+column*N];
1122  }
1123 
1129  inline float64_t get_a(T_STATES line_, T_STATES column) const
1130  {
1131 #ifdef HMM_DEBUG
1132  if ((line_>N)||(column>N))
1133  SG_DEBUG("index out of range in get_a(%i,%i) [%i,%i]\n",line_,column,N,N)
1134 #endif
1135  return transition_matrix_a[line_+column*N]; // look also best_path()!
1136  }
1137 
1143  inline float64_t get_B(T_STATES line_, uint16_t column) const
1144  {
1145 #ifdef HMM_DEBUG
1146  if ((line_>=N)||(column>=M))
1147  SG_DEBUG("index out of range in get_B(%i,%i) [%i,%i]\n", line_, column,N,M)
1148 #endif
1149  return observation_matrix_B[line_*M+column];
1150  }
1151 
1157  inline float64_t get_b(T_STATES line_, uint16_t column) const
1158  {
1159 #ifdef HMM_DEBUG
1160  if ((line_>=N)||(column>=M))
1161  SG_DEBUG("index out of range in get_b(%i,%i) [%i,%i]\n", line_, column,N,M)
1162 #endif
1163  //SG_PRINT("idx %d\n", line_*M+column)
1164  return observation_matrix_b[line_*M+column];
1165  }
1166 
1174  int32_t time, T_STATES state, int32_t dimension) const
1175  {
1176 #ifdef HMM_DEBUG
1177  if ((time>=p_observations->get_max_vector_length())||(state>N))
1178  SG_DEBUG("index out of range in get_psi(%i,%i) [%i,%i]\n",time,state,p_observations->get_max_vector_length(),N)
1179 #endif
1180  return STATES_PER_OBSERVATION_PSI(dimension)[time*N+state];
1181  }
1182 
1184 
1186  virtual const char* get_name() const { return "HMM"; }
1187 
1188  protected:
1193 
1194  int32_t M;
1195 
1197  int32_t N;
1198 
1201 
1202  // line number during processing input files
1203  int32_t line;
1204 
1207 
1208  //train definition for HMM
1210 
1213 
1216 
1219 
1222 
1225 
1228 
1230  int32_t iterations;
1232 
1235  int32_t conv_it;
1236 
1239 
1242 
1245 
1248 
1251 
1254 
1257 
1258  // true if model is using log likelihood
1260 
1261  // true->ok, false->error
1262  bool status;
1263 
1264  // true->stolen from other HMMs, false->got own
1267 
1268 #ifdef USE_HMMPARALLEL_STRUCTURES
1269 
1270  float64_t** arrayN1 /*[parallel.get_num_threads()]*/ ;
1272  float64_t** arrayN2 /*[parallel.get_num_threads()]*/ ;
1273 #else //USE_HMMPARALLEL_STRUCTURES
1274 
1278 #endif //USE_HMMPARALLEL_STRUCTURES
1279 
1280 #ifdef USE_LOGSUMARRAY
1281 #ifdef USE_HMMPARALLEL_STRUCTURES
1282 
1283  float64_t** arrayS /*[parallel.get_num_threads()]*/;
1284 #else
1285 
1286  float64_t* arrayS;
1287 #endif // USE_HMMPARALLEL_STRUCTURES
1288 #endif // USE_LOGSUMARRAY
1289 
1290 #ifdef USE_HMMPARALLEL_STRUCTURES
1291 
1293  T_ALPHA_BETA* alpha_cache /*[parallel.get_num_threads()]*/ ;
1295  T_ALPHA_BETA* beta_cache /*[parallel.get_num_threads()]*/ ;
1296 
1298  T_STATES** states_per_observation_psi /*[parallel.get_num_threads()]*/ ;
1299 
1301  T_STATES** path /*[parallel.get_num_threads()]*/ ;
1302 
1304  bool* path_prob_updated /*[parallel.get_num_threads()]*/;
1305 
1307  int32_t* path_prob_dimension /*[parallel.get_num_threads()]*/ ;
1308 
1309 #else //USE_HMMPARALLEL_STRUCTURES
1310 
1311  T_ALPHA_BETA alpha_cache;
1313  T_ALPHA_BETA beta_cache;
1314 
1317 
1320 
1323 
1326 
1327 #endif //USE_HMMPARALLEL_STRUCTURES
1328 
1329 
1331  static const int32_t GOTN;
1333  static const int32_t GOTM;
1335  static const int32_t GOTO;
1337  static const int32_t GOTa;
1339  static const int32_t GOTb;
1341  static const int32_t GOTp;
1343  static const int32_t GOTq;
1344 
1346  static const int32_t GOTlearn_a;
1348  static const int32_t GOTlearn_b;
1350  static const int32_t GOTlearn_p;
1352  static const int32_t GOTlearn_q;
1354  static const int32_t GOTconst_a;
1356  static const int32_t GOTconst_b;
1358  static const int32_t GOTconst_p;
1360  static const int32_t GOTconst_q;
1361 
1362  public:
1367 
1370  int32_t time, int32_t state, int32_t dimension)
1371 {
1372  return forward(time, state, dimension) + backward(time, state, dimension) - model_probability(dimension);
1373 }
1374 
1377  int32_t time, int32_t state_i, int32_t state_j, int32_t dimension)
1378 {
1379  return forward(time, state_i, dimension) +
1380  backward(time+1, state_j, dimension) +
1381  get_a(state_i,state_j) + get_b(state_j,p_observations->get_feature(dimension ,time+1)) - model_probability(dimension);
1382 }
1383 
1390 
1394  T_STATES i, uint16_t j, int32_t dimension)
1395 {
1396  float64_t der=0;
1397 
1398  for (int32_t k=0; k<N; k++)
1399  {
1400  if (k!=i || p_observations->get_feature(dimension, k) != j)
1401  der+=get_b(k, p_observations->get_feature(dimension, k));
1402  }
1403 
1404  return der;
1405 }
1406 
1410 inline float64_t model_derivative_p(T_STATES i, int32_t dimension)
1411 {
1412  return backward(0,i,dimension)+get_b(i, p_observations->get_feature(dimension, 0));
1413 }
1414 
1418 inline float64_t model_derivative_q(T_STATES i, int32_t dimension)
1419 {
1420  return forward(p_observations->get_vector_length(dimension)-1,i,dimension) ;
1421 }
1422 
1424 inline float64_t model_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
1425 {
1427  for (int32_t t=0; t<p_observations->get_vector_length(dimension)-1; t++)
1428  sum= CMath::logarithmic_sum(sum, forward(t, i, dimension) + backward(t+1, j, dimension) + get_b(j, p_observations->get_feature(dimension,t+1)));
1429 
1430  return sum;
1431 }
1432 
1433 
1435 inline float64_t model_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
1436 {
1438  for (int32_t t=0; t<p_observations->get_vector_length(dimension); t++)
1439  {
1440  if (p_observations->get_feature(dimension,t)==j)
1441  sum= CMath::logarithmic_sum(sum, forward(t,i,dimension)+backward(t,i,dimension)-get_b(i,p_observations->get_feature(dimension,t)));
1442  }
1443  //if (sum==-CMath::INFTY)
1444  // SG_DEBUG("log derivative is -inf: dim=%i, state=%i, obs=%i\n",dimension, i, j)
1445  return sum;
1446 }
1448 
1455 
1457 inline float64_t path_derivative_p(T_STATES i, int32_t dimension)
1458 {
1459  best_path(dimension);
1460  return (i==PATH(dimension)[0]) ? (exp(-get_p(PATH(dimension)[0]))) : (0) ;
1461 }
1462 
1464 inline float64_t path_derivative_q(T_STATES i, int32_t dimension)
1465 {
1466  best_path(dimension);
1467  return (i==PATH(dimension)[p_observations->get_vector_length(dimension)-1]) ? (exp(-get_q(PATH(dimension)[p_observations->get_vector_length(dimension)-1]))) : 0 ;
1468 }
1469 
1471 inline float64_t path_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
1472 {
1473  prepare_path_derivative(dimension) ;
1474  return (get_A(i,j)==0) ? (0) : (get_A(i,j)*exp(-get_a(i,j))) ;
1475 }
1476 
1478 inline float64_t path_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
1479 {
1480  prepare_path_derivative(dimension) ;
1481  return (get_B(i,j)==0) ? (0) : (get_B(i,j)*exp(-get_b(i,j))) ;
1482 }
1483 
1485 
1486 
1487 protected:
1492 
1493  bool get_numbuffer(FILE* file, char* buffer, int32_t length);
1494 
1496  void open_bracket(FILE* file);
1497 
1499  void close_bracket(FILE* file);
1500 
1502  bool comma_or_space(FILE* file);
1503 
1505  inline void error(int32_t p_line, const char* str)
1506  {
1507  if (p_line)
1508  SG_ERROR("error in line %d %s\n", p_line, str)
1509  else
1510  SG_ERROR("error %s\n", str)
1511  }
1513 
1515  inline void prepare_path_derivative(int32_t dim)
1516  {
1518  return ;
1519  int32_t i,j,t ;
1520  best_path(dim);
1521  //initialize with zeros
1522  for (i=0; i<N; i++)
1523  {
1524  for (j=0; j<N; j++)
1525  set_A(i,j, 0);
1526  for (j=0; j<M; j++)
1527  set_B(i,j, 0);
1528  }
1529 
1530  //counting occurences for A and B
1531  for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
1532  {
1533  set_A(PATH(dim)[t], PATH(dim)[t+1], get_A(PATH(dim)[t], PATH(dim)[t+1])+1);
1534  set_B(PATH(dim)[t], p_observations->get_feature(dim,t), get_B(PATH(dim)[t], p_observations->get_feature(dim,t))+1);
1535  }
1537  path_deriv_dimension=dim ;
1538  path_deriv_updated=true ;
1539  } ;
1541 
1543  inline float64_t forward(int32_t time, int32_t state, int32_t dimension)
1544  {
1545  if (time<1)
1546  time=0;
1547 
1548  if (ALPHA_CACHE(dimension).table && (dimension==ALPHA_CACHE(dimension).dimension) && ALPHA_CACHE(dimension).updated)
1549  {
1550  if (time<p_observations->get_vector_length(dimension))
1551  return ALPHA_CACHE(dimension).table[time*N+state];
1552  else
1553  return ALPHA_CACHE(dimension).sum;
1554  }
1555  else
1556  return forward_comp(time, state, dimension) ;
1557  }
1558 
1560  inline float64_t backward(int32_t time, int32_t state, int32_t dimension)
1561  {
1562  if (BETA_CACHE(dimension).table && (dimension==BETA_CACHE(dimension).dimension) && (BETA_CACHE(dimension).updated))
1563  {
1564  if (time<0)
1565  return BETA_CACHE(dimension).sum;
1566  if (time<p_observations->get_vector_length(dimension))
1567  return BETA_CACHE(dimension).table[time*N+state];
1568  else
1569  return -CMath::INFTY;
1570  }
1571  else
1572  return backward_comp(time, state, dimension) ;
1573  }
1574 
1575 };
1576 }
1577 #endif

SHOGUN Machine Learning Toolbox - Documentation