00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "distributions/HMM.h"
00012 #include "lib/Mathematics.h"
00013 #include "lib/io.h"
00014 #include "lib/config.h"
00015 #include "lib/Signal.h"
00016 #include "base/Parallel.h"
00017 #include "features/StringFeatures.h"
00018 #include "features/Alphabet.h"
00019
00020 #include <stdlib.h>
00021 #include <stdio.h>
00022 #include <time.h>
00023 #include <ctype.h>
00024
00025 #define VAL_MACRO log((default_value == 0) ? (CMath::random(MIN_RAND, MAX_RAND)) : default_value)
00026 #define ARRAY_SIZE 65336
00027
00028 using namespace shogun;
00029
00031
00033
00034 const int32_t CHMM::GOTN= (1<<1);
00035 const int32_t CHMM::GOTM= (1<<2);
00036 const int32_t CHMM::GOTO= (1<<3);
00037 const int32_t CHMM::GOTa= (1<<4);
00038 const int32_t CHMM::GOTb= (1<<5);
00039 const int32_t CHMM::GOTp= (1<<6);
00040 const int32_t CHMM::GOTq= (1<<7);
00041
00042 const int32_t CHMM::GOTlearn_a= (1<<1);
00043 const int32_t CHMM::GOTlearn_b= (1<<2);
00044 const int32_t CHMM::GOTlearn_p= (1<<3);
00045 const int32_t CHMM::GOTlearn_q= (1<<4);
00046 const int32_t CHMM::GOTconst_a= (1<<5);
00047 const int32_t CHMM::GOTconst_b= (1<<6);
00048 const int32_t CHMM::GOTconst_p= (1<<7);
00049 const int32_t CHMM::GOTconst_q= (1<<8);
00050
00051 enum E_STATE
00052 {
00053 INITIAL,
00054 ARRAYs,
00055 GET_N,
00056 GET_M,
00057 GET_a,
00058 GET_b,
00059 GET_p,
00060 GET_q,
00061 GET_learn_a,
00062 GET_learn_b,
00063 GET_learn_p,
00064 GET_learn_q,
00065 GET_const_a,
00066 GET_const_b,
00067 GET_const_p,
00068 GET_const_q,
00069 COMMENT,
00070 END
00071 };
00072
00073
00074 #ifdef FIX_POS
00075 const char Model::FIX_DISALLOWED=0 ;
00076 const char Model::FIX_ALLOWED=1 ;
00077 const char Model::FIX_DEFAULT=-1 ;
00078 const float64_t Model::DISALLOWED_PENALTY=CMath::ALMOST_NEG_INFTY ;
00079 #endif
00080
00081 Model::Model()
00082 {
00083 const_a=new int[ARRAY_SIZE];
00084 const_b=new int[ARRAY_SIZE];
00085 const_p=new int[ARRAY_SIZE];
00086 const_q=new int[ARRAY_SIZE];
00087 const_a_val=new float64_t[ARRAY_SIZE];
00088 const_b_val=new float64_t[ARRAY_SIZE];
00089 const_p_val=new float64_t[ARRAY_SIZE];
00090 const_q_val=new float64_t[ARRAY_SIZE];
00091
00092
00093 learn_a=new int[ARRAY_SIZE];
00094 learn_b=new int[ARRAY_SIZE];
00095 learn_p=new int[ARRAY_SIZE];
00096 learn_q=new int[ARRAY_SIZE];
00097
00098 #ifdef FIX_POS
00099 fix_pos_state = new char[ARRAY_SIZE];
00100 #endif
00101 for (int32_t i=0; i<ARRAY_SIZE; i++)
00102 {
00103 const_a[i]=-1 ;
00104 const_b[i]=-1 ;
00105 const_p[i]=-1 ;
00106 const_q[i]=-1 ;
00107 const_a_val[i]=1.0 ;
00108 const_b_val[i]=1.0 ;
00109 const_p_val[i]=1.0 ;
00110 const_q_val[i]=1.0 ;
00111 learn_a[i]=-1 ;
00112 learn_b[i]=-1 ;
00113 learn_p[i]=-1 ;
00114 learn_q[i]=-1 ;
00115 #ifdef FIX_POS
00116 fix_pos_state[i] = FIX_DEFAULT ;
00117 #endif
00118 } ;
00119 }
00120
00121 Model::~Model()
00122 {
00123 delete[] const_a;
00124 delete[] const_b;
00125 delete[] const_p;
00126 delete[] const_q;
00127 delete[] const_a_val;
00128 delete[] const_b_val;
00129 delete[] const_p_val;
00130 delete[] const_q_val;
00131
00132 delete[] learn_a;
00133 delete[] learn_b;
00134 delete[] learn_p;
00135 delete[] learn_q;
00136
00137 #ifdef FIX_POS
00138 delete[] fix_pos_state;
00139 #endif
00140
00141 }
00142
00143 CHMM::CHMM(void)
00144 {
00145 N=0;
00146 M=0;
00147 model=NULL;
00148 status=false;
00149 }
00150
00151 CHMM::CHMM(CHMM* h)
00152 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00153 {
00154 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00155
00156 this->N=h->get_N();
00157 this->M=h->get_M();
00158 status=initialize(NULL, h->get_pseudo());
00159 this->copy_model(h);
00160 set_observations(h->p_observations);
00161 }
00162
00163 CHMM::CHMM(int32_t p_N, int32_t p_M, Model* p_model, float64_t p_PSEUDO)
00164 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00165 {
00166 this->N=p_N;
00167 this->M=p_M;
00168 model=NULL ;
00169
00170 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00171
00172 status=initialize(p_model, p_PSEUDO);
00173 }
00174
00175 CHMM::CHMM(
00176 CStringFeatures<uint16_t>* obs, int32_t p_N, int32_t p_M,
00177 float64_t p_PSEUDO)
00178 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00179 {
00180 this->N=p_N;
00181 this->M=p_M;
00182 model=NULL ;
00183
00184 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00185
00186 initialize(model, p_PSEUDO);
00187 set_observations(obs);
00188 }
00189
00190 CHMM::CHMM(int32_t p_N, float64_t* p, float64_t* q, float64_t* a)
00191 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00192 {
00193 this->N=p_N;
00194 this->M=0;
00195 model=NULL ;
00196
00197 trans_list_forward = NULL ;
00198 trans_list_forward_cnt = NULL ;
00199 trans_list_forward_val = NULL ;
00200 trans_list_backward = NULL ;
00201 trans_list_backward_cnt = NULL ;
00202 trans_list_len = 0 ;
00203 mem_initialized = false ;
00204
00205 this->transition_matrix_a=NULL;
00206 this->observation_matrix_b=NULL;
00207 this->initial_state_distribution_p=NULL;
00208 this->end_state_distribution_q=NULL;
00209 this->PSEUDO= PSEUDO;
00210 this->model= model;
00211 this->p_observations=NULL;
00212 this->reused_caches=false;
00213
00214 #ifdef USE_HMMPARALLEL_STRUCTURES
00215 this->alpha_cache=NULL;
00216 this->beta_cache=NULL;
00217 #else
00218 this->alpha_cache.table=NULL;
00219 this->beta_cache.table=NULL;
00220 this->alpha_cache.dimension=0;
00221 this->beta_cache.dimension=0;
00222 #endif
00223
00224 this->states_per_observation_psi=NULL ;
00225 this->path=NULL;
00226 arrayN1=NULL ;
00227 arrayN2=NULL ;
00228
00229 this->loglikelihood=false;
00230 mem_initialized = true ;
00231
00232 transition_matrix_a=a ;
00233 observation_matrix_b=NULL ;
00234 initial_state_distribution_p=p ;
00235 end_state_distribution_q=q ;
00236 transition_matrix_A=NULL ;
00237 observation_matrix_B=NULL ;
00238
00239
00240 }
00241
00242 CHMM::CHMM(
00243 int32_t p_N, float64_t* p, float64_t* q, int32_t num_trans,
00244 float64_t* a_trans)
00245 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00246 {
00247 model=NULL ;
00248
00249 this->N=p_N;
00250 this->M=0;
00251
00252 trans_list_forward = NULL ;
00253 trans_list_forward_cnt = NULL ;
00254 trans_list_forward_val = NULL ;
00255 trans_list_backward = NULL ;
00256 trans_list_backward_cnt = NULL ;
00257 trans_list_len = 0 ;
00258 mem_initialized = false ;
00259
00260 this->transition_matrix_a=NULL;
00261 this->observation_matrix_b=NULL;
00262 this->initial_state_distribution_p=NULL;
00263 this->end_state_distribution_q=NULL;
00264 this->PSEUDO= PSEUDO;
00265 this->model= model;
00266 this->p_observations=NULL;
00267 this->reused_caches=false;
00268
00269 #ifdef USE_HMMPARALLEL_STRUCTURES
00270 this->alpha_cache=NULL;
00271 this->beta_cache=NULL;
00272 #else
00273 this->alpha_cache.table=NULL;
00274 this->beta_cache.table=NULL;
00275 this->alpha_cache.dimension=0;
00276 this->beta_cache.dimension=0;
00277 #endif
00278
00279 this->states_per_observation_psi=NULL ;
00280 this->path=NULL;
00281 arrayN1=NULL ;
00282 arrayN2=NULL ;
00283
00284 this->loglikelihood=false;
00285 mem_initialized = true ;
00286
00287 trans_list_forward_cnt=NULL ;
00288 trans_list_len = N ;
00289 trans_list_forward = new T_STATES*[N] ;
00290 trans_list_forward_val = new float64_t*[N] ;
00291 trans_list_forward_cnt = new T_STATES[N] ;
00292
00293 int32_t start_idx=0;
00294 for (int32_t j=0; j<N; j++)
00295 {
00296 int32_t old_start_idx=start_idx;
00297
00298 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00299 {
00300 start_idx++;
00301
00302 if (start_idx>1 && start_idx<num_trans)
00303 ASSERT(a_trans[start_idx+num_trans-1]<=
00304 a_trans[start_idx+num_trans]);
00305 }
00306
00307 if (start_idx>1 && start_idx<num_trans)
00308 ASSERT(a_trans[start_idx+num_trans-1]<=
00309 a_trans[start_idx+num_trans]);
00310
00311 int32_t len=start_idx-old_start_idx;
00312 ASSERT(len>=0);
00313
00314 trans_list_forward_cnt[j] = 0 ;
00315
00316 if (len>0)
00317 {
00318 trans_list_forward[j] = new T_STATES[len] ;
00319 trans_list_forward_val[j] = new float64_t[len] ;
00320 }
00321 else
00322 {
00323 trans_list_forward[j] = NULL;
00324 trans_list_forward_val[j] = NULL;
00325 }
00326 }
00327
00328 for (int32_t i=0; i<num_trans; i++)
00329 {
00330 int32_t from = (int32_t)a_trans[i+num_trans] ;
00331 int32_t to = (int32_t)a_trans[i] ;
00332 float64_t val = a_trans[i+num_trans*2] ;
00333
00334 ASSERT(from>=0 && from<N);
00335 ASSERT(to>=0 && to<N);
00336
00337 trans_list_forward[from][trans_list_forward_cnt[from]]=to ;
00338 trans_list_forward_val[from][trans_list_forward_cnt[from]]=val ;
00339 trans_list_forward_cnt[from]++ ;
00340
00341 } ;
00342
00343 transition_matrix_a=NULL ;
00344 observation_matrix_b=NULL ;
00345 initial_state_distribution_p=p ;
00346 end_state_distribution_q=q ;
00347 transition_matrix_A=NULL ;
00348 observation_matrix_B=NULL ;
00349
00350
00351 }
00352
00353
00354 CHMM::CHMM(FILE* model_file, float64_t p_PSEUDO)
00355 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00356 {
00357 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00358
00359 status=initialize(NULL, p_PSEUDO, model_file);
00360 }
00361
00362 CHMM::~CHMM()
00363 {
00364 SG_UNREF(p_observations);
00365
00366 if (trans_list_forward_cnt)
00367 delete[] trans_list_forward_cnt ;
00368 if (trans_list_backward_cnt)
00369 delete[] trans_list_backward_cnt ;
00370 if (trans_list_forward)
00371 {
00372 for (int32_t i=0; i<trans_list_len; i++)
00373 if (trans_list_forward[i])
00374 delete[] trans_list_forward[i] ;
00375 delete[] trans_list_forward ;
00376 }
00377 if (trans_list_forward_val)
00378 {
00379 for (int32_t i=0; i<trans_list_len; i++)
00380 if (trans_list_forward_val[i])
00381 delete[] trans_list_forward_val[i] ;
00382 delete[] trans_list_forward_val ;
00383 }
00384 if (trans_list_backward)
00385 {
00386 for (int32_t i=0; i<trans_list_len; i++)
00387 if (trans_list_backward[i])
00388 delete[] trans_list_backward[i] ;
00389 delete[] trans_list_backward ;
00390 } ;
00391
00392 free_state_dependend_arrays();
00393
00394 if (!reused_caches)
00395 {
00396 #ifdef USE_HMMPARALLEL_STRUCTURES
00397 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00398 {
00399 delete[] alpha_cache[i].table;
00400 delete[] beta_cache[i].table;
00401 alpha_cache[i].table=NULL;
00402 beta_cache[i].table=NULL;
00403 }
00404
00405 delete[] alpha_cache;
00406 delete[] beta_cache;
00407 alpha_cache=NULL;
00408 beta_cache=NULL;
00409 #else // USE_HMMPARALLEL_STRUCTURES
00410 delete[] alpha_cache.table;
00411 delete[] beta_cache.table;
00412 alpha_cache.table=NULL;
00413 beta_cache.table=NULL;
00414 #endif // USE_HMMPARALLEL_STRUCTURES
00415
00416 delete[] states_per_observation_psi;
00417 states_per_observation_psi=NULL;
00418 }
00419
00420 #ifdef USE_LOGSUMARRAY
00421 #ifdef USE_HMMPARALLEL_STRUCTURES
00422 {
00423 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00424 delete[] arrayS[i];
00425 delete[] arrayS ;
00426 } ;
00427 #else //USE_HMMPARALLEL_STRUCTURES
00428 delete[] arrayS;
00429 #endif //USE_HMMPARALLEL_STRUCTURES
00430 #endif //USE_LOGSUMARRAY
00431
00432 if (!reused_caches)
00433 {
00434 #ifdef USE_HMMPARALLEL_STRUCTURES
00435 delete[] path_prob_updated ;
00436 delete[] path_prob_dimension ;
00437 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00438 delete[] path[i] ;
00439 #endif //USE_HMMPARALLEL_STRUCTURES
00440 delete[] path;
00441 }
00442 }
00443
00444 bool CHMM::train(CFeatures* data)
00445 {
00446 if (data)
00447 {
00448 if (data->get_feature_class() != C_STRING ||
00449 data->get_feature_type() != F_WORD)
00450 {
00451 SG_ERROR("Expected features of class string type word\n");
00452 }
00453 set_observations((CStringFeatures<uint16_t>*) data);
00454 }
00455 return baum_welch_viterbi_train(BW_NORMAL);
00456 }
00457
00458 bool CHMM::alloc_state_dependend_arrays()
00459 {
00460
00461 if (!transition_matrix_a && !observation_matrix_b &&
00462 !initial_state_distribution_p && !end_state_distribution_q)
00463 {
00464 transition_matrix_a=new float64_t[N*N];
00465 observation_matrix_b=new float64_t[N*M];
00466 initial_state_distribution_p=new float64_t[N];
00467 end_state_distribution_q=new float64_t[N];
00468 init_model_random();
00469 convert_to_log();
00470 }
00471
00472 #ifdef USE_HMMPARALLEL_STRUCTURES
00473 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00474 {
00475 arrayN1[i]=new float64_t[N];
00476 arrayN2[i]=new float64_t[N];
00477 }
00478 #else //USE_HMMPARALLEL_STRUCTURES
00479 arrayN1=new float64_t[N];
00480 arrayN2=new float64_t[N];
00481 #endif //USE_HMMPARALLEL_STRUCTURES
00482
00483 #ifdef LOG_SUMARRAY
00484 #ifdef USE_HMMPARALLEL_STRUCTURES
00485 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00486 arrayS[i]=new float64_t[(int32_t)(this->N/2+1)];
00487 #else //USE_HMMPARALLEL_STRUCTURES
00488 arrayS=new float64_t[(int32_t)(this->N/2+1)];
00489 #endif //USE_HMMPARALLEL_STRUCTURES
00490 #endif //LOG_SUMARRAY
00491 transition_matrix_A=new float64_t[this->N*this->N];
00492 observation_matrix_B=new float64_t[this->N*this->M];
00493
00494 if (p_observations)
00495 {
00496 #ifdef USE_HMMPARALLEL_STRUCTURES
00497 if (alpha_cache[0].table!=NULL)
00498 #else //USE_HMMPARALLEL_STRUCTURES
00499 if (alpha_cache.table!=NULL)
00500 #endif //USE_HMMPARALLEL_STRUCTURES
00501 set_observations(p_observations);
00502 else
00503 set_observation_nocache(p_observations);
00504 SG_UNREF(p_observations);
00505 }
00506
00507 this->invalidate_model();
00508
00509 return ((transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00510 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) &&
00511 (initial_state_distribution_p != NULL) &&
00512 (end_state_distribution_q != NULL));
00513 }
00514
00515 void CHMM::free_state_dependend_arrays()
00516 {
00517 #ifdef USE_HMMPARALLEL_STRUCTURES
00518 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00519 {
00520 delete[] arrayN1[i];
00521 delete[] arrayN2[i];
00522
00523 arrayN1[i]=NULL;
00524 arrayN2[i]=NULL;
00525 }
00526 #else
00527 delete[] arrayN1;
00528 delete[] arrayN2;
00529 arrayN1=NULL;
00530 arrayN2=NULL;
00531 #endif
00532 if (observation_matrix_b)
00533 {
00534 delete[] transition_matrix_A;
00535 delete[] observation_matrix_B;
00536 delete[] transition_matrix_a;
00537 delete[] observation_matrix_b;
00538 delete[] initial_state_distribution_p;
00539 delete[] end_state_distribution_q;
00540 } ;
00541
00542 transition_matrix_A=NULL;
00543 observation_matrix_B=NULL;
00544 transition_matrix_a=NULL;
00545 observation_matrix_b=NULL;
00546 initial_state_distribution_p=NULL;
00547 end_state_distribution_q=NULL;
00548 }
00549
00550 bool CHMM::initialize(Model* m, float64_t pseudo, FILE* modelfile)
00551 {
00552
00553 bool files_ok=true;
00554
00555 trans_list_forward = NULL ;
00556 trans_list_forward_cnt = NULL ;
00557 trans_list_forward_val = NULL ;
00558 trans_list_backward = NULL ;
00559 trans_list_backward_cnt = NULL ;
00560 trans_list_len = 0 ;
00561 mem_initialized = false ;
00562
00563 this->transition_matrix_a=NULL;
00564 this->observation_matrix_b=NULL;
00565 this->initial_state_distribution_p=NULL;
00566 this->end_state_distribution_q=NULL;
00567 this->PSEUDO= pseudo;
00568 this->model= m;
00569 this->p_observations=NULL;
00570 this->reused_caches=false;
00571
00572 #ifdef USE_HMMPARALLEL_STRUCTURES
00573 alpha_cache=new T_ALPHA_BETA[parallel->get_num_threads()] ;
00574 beta_cache=new T_ALPHA_BETA[parallel->get_num_threads()] ;
00575 states_per_observation_psi=new P_STATES[parallel->get_num_threads()] ;
00576
00577 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00578 {
00579 this->alpha_cache[i].table=NULL;
00580 this->beta_cache[i].table=NULL;
00581 this->alpha_cache[i].dimension=0;
00582 this->beta_cache[i].dimension=0;
00583 this->states_per_observation_psi[i]=NULL ;
00584 }
00585
00586 #else // USE_HMMPARALLEL_STRUCTURES
00587 this->alpha_cache.table=NULL;
00588 this->beta_cache.table=NULL;
00589 this->alpha_cache.dimension=0;
00590 this->beta_cache.dimension=0;
00591 this->states_per_observation_psi=NULL ;
00592 #endif //USE_HMMPARALLEL_STRUCTURES
00593
00594 if (modelfile)
00595 files_ok= files_ok && load_model(modelfile);
00596
00597 #ifdef USE_HMMPARALLEL_STRUCTURES
00598 path_prob_updated=new bool[parallel->get_num_threads()];
00599 path_prob_dimension=new int[parallel->get_num_threads()];
00600
00601 path=new P_STATES[parallel->get_num_threads()];
00602
00603 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00604 this->path[i]=NULL;
00605
00606 #else // USE_HMMPARALLEL_STRUCTURES
00607 this->path=NULL;
00608
00609 #endif //USE_HMMPARALLEL_STRUCTURES
00610
00611 #ifdef USE_HMMPARALLEL_STRUCTURES
00612 arrayN1=new float64_t*[parallel->get_num_threads()];
00613 arrayN2=new float64_t*[parallel->get_num_threads()];
00614 #endif //USE_HMMPARALLEL_STRUCTURES
00615
00616 #ifdef LOG_SUMARRAY
00617 #ifdef USE_HMMPARALLEL_STRUCTURES
00618 arrayS=new float64_t*[parallel->get_num_threads()] ;
00619 #endif // USE_HMMPARALLEL_STRUCTURES
00620 #endif //LOG_SUMARRAY
00621
00622 alloc_state_dependend_arrays();
00623
00624 this->loglikelihood=false;
00625 mem_initialized = true ;
00626 this->invalidate_model();
00627
00628 return ((files_ok) &&
00629 (transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00630 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) && (initial_state_distribution_p != NULL) &&
00631 (end_state_distribution_q != NULL));
00632 }
00633
00634
00635
00636
00637
00638
00639 float64_t CHMM::forward_comp(int32_t time, int32_t state, int32_t dimension)
00640 {
00641 T_ALPHA_BETA_TABLE* alpha_new;
00642 T_ALPHA_BETA_TABLE* alpha;
00643 T_ALPHA_BETA_TABLE* dummy;
00644 if (time<1)
00645 time=0;
00646
00647
00648 int32_t wanted_time=time;
00649
00650 if (ALPHA_CACHE(dimension).table)
00651 {
00652 alpha=&ALPHA_CACHE(dimension).table[0];
00653 alpha_new=&ALPHA_CACHE(dimension).table[N];
00654 time=p_observations->get_vector_length(dimension)+1;
00655 }
00656 else
00657 {
00658 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00659 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00660 }
00661
00662 if (time<1)
00663 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00664 else
00665 {
00666
00667 for (int32_t i=0; i<N; i++)
00668 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00669
00670
00671 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00672 {
00673
00674 for (int32_t j=0; j<N; j++)
00675 {
00676 register int32_t i, num = trans_list_forward_cnt[j] ;
00677 float64_t sum=-CMath::INFTY;
00678 for (i=0; i < num; i++)
00679 {
00680 int32_t ii = trans_list_forward[j][i] ;
00681 sum = CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii,j));
00682 } ;
00683
00684 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00685 }
00686
00687 if (!ALPHA_CACHE(dimension).table)
00688 {
00689 dummy=alpha;
00690 alpha=alpha_new;
00691 alpha_new=dummy;
00692 }
00693 else
00694 {
00695 alpha=alpha_new;
00696 alpha_new+=N;
00697 }
00698 }
00699
00700
00701 if (time<p_observations->get_vector_length(dimension))
00702 {
00703 register int32_t i, num=trans_list_forward_cnt[state];
00704 register float64_t sum=-CMath::INFTY;
00705 for (i=0; i<num; i++)
00706 {
00707 int32_t ii = trans_list_forward[state][i] ;
00708 sum= CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii, state));
00709 } ;
00710
00711 return sum + get_b(state, p_observations->get_feature(dimension,time));
00712 }
00713 else
00714 {
00715
00716 register int32_t i ;
00717 float64_t sum ;
00718 sum=-CMath::INFTY;
00719 for (i=0; i<N; i++)
00720 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00721
00722 if (!ALPHA_CACHE(dimension).table)
00723 return sum;
00724 else
00725 {
00726 ALPHA_CACHE(dimension).dimension=dimension;
00727 ALPHA_CACHE(dimension).updated=true;
00728 ALPHA_CACHE(dimension).sum=sum;
00729
00730 if (wanted_time<p_observations->get_vector_length(dimension))
00731 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00732 else
00733 return ALPHA_CACHE(dimension).sum;
00734 }
00735 }
00736 }
00737 }
00738
00739
00740
00741
00742
00743 float64_t CHMM::forward_comp_old(int32_t time, int32_t state, int32_t dimension)
00744 {
00745 T_ALPHA_BETA_TABLE* alpha_new;
00746 T_ALPHA_BETA_TABLE* alpha;
00747 T_ALPHA_BETA_TABLE* dummy;
00748 if (time<1)
00749 time=0;
00750
00751 int32_t wanted_time=time;
00752
00753 if (ALPHA_CACHE(dimension).table)
00754 {
00755 alpha=&ALPHA_CACHE(dimension).table[0];
00756 alpha_new=&ALPHA_CACHE(dimension).table[N];
00757 time=p_observations->get_vector_length(dimension)+1;
00758 }
00759 else
00760 {
00761 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00762 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00763 }
00764
00765 if (time<1)
00766 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00767 else
00768 {
00769
00770 for (int32_t i=0; i<N; i++)
00771 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00772
00773
00774 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00775 {
00776
00777 for (int32_t j=0; j<N; j++)
00778 {
00779 register int32_t i ;
00780 #ifdef USE_LOGSUMARRAY
00781 for (i=0; i<(N>>1); i++)
00782 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,j),
00783 alpha[(i<<1)+1] + get_a((i<<1)+1,j));
00784 if (N%2==1)
00785 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+
00786 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,j),
00787 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00788 else
00789 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00790 #else //USE_LOGSUMARRAY
00791 float64_t sum=-CMath::INFTY;
00792 for (i=0; i<N; i++)
00793 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i,j));
00794
00795 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00796 #endif //USE_LOGSUMARRAY
00797 }
00798
00799 if (!ALPHA_CACHE(dimension).table)
00800 {
00801 dummy=alpha;
00802 alpha=alpha_new;
00803 alpha_new=dummy;
00804 }
00805 else
00806 {
00807 alpha=alpha_new;
00808 alpha_new+=N;
00809 }
00810 }
00811
00812
00813 if (time<p_observations->get_vector_length(dimension))
00814 {
00815 register int32_t i;
00816 #ifdef USE_LOGSUMARRAY
00817 for (i=0; i<(N>>1); i++)
00818 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,state),
00819 alpha[(i<<1)+1] + get_a((i<<1)+1,state));
00820 if (N%2==1)
00821 return get_b(state, p_observations->get_feature(dimension,time))+
00822 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,state),
00823 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00824 else
00825 return get_b(state, p_observations->get_feature(dimension,time))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00826 #else //USE_LOGSUMARRAY
00827 register float64_t sum=-CMath::INFTY;
00828 for (i=0; i<N; i++)
00829 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i, state));
00830
00831 return sum + get_b(state, p_observations->get_feature(dimension,time));
00832 #endif //USE_LOGSUMARRAY
00833 }
00834 else
00835 {
00836
00837 register int32_t i ;
00838 float64_t sum ;
00839 #ifdef USE_LOGSUMARRAY
00840 for (i=0; i<(N>>1); i++)
00841 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_q(i<<1),
00842 alpha[(i<<1)+1] + get_q((i<<1)+1));
00843 if (N%2==1)
00844 sum=CMath::logarithmic_sum(alpha[N-1]+get_q(N-1),
00845 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00846 else
00847 sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00848 #else //USE_LOGSUMARRAY
00849 sum=-CMath::INFTY;
00850 for (i=0; i<N; i++)
00851 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00852 #endif //USE_LOGSUMARRAY
00853
00854 if (!ALPHA_CACHE(dimension).table)
00855 return sum;
00856 else
00857 {
00858 ALPHA_CACHE(dimension).dimension=dimension;
00859 ALPHA_CACHE(dimension).updated=true;
00860 ALPHA_CACHE(dimension).sum=sum;
00861
00862 if (wanted_time<p_observations->get_vector_length(dimension))
00863 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00864 else
00865 return ALPHA_CACHE(dimension).sum;
00866 }
00867 }
00868 }
00869 }
00870
00871
00872
00873
00874
00875 float64_t CHMM::backward_comp(int32_t time, int32_t state, int32_t dimension)
00876 {
00877 T_ALPHA_BETA_TABLE* beta_new;
00878 T_ALPHA_BETA_TABLE* beta;
00879 T_ALPHA_BETA_TABLE* dummy;
00880 int32_t wanted_time=time;
00881
00882 if (time<0)
00883 forward(time, state, dimension);
00884
00885 if (BETA_CACHE(dimension).table)
00886 {
00887 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00888 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00889 time=-1;
00890 }
00891 else
00892 {
00893 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00894 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00895 }
00896
00897 if (time>=p_observations->get_vector_length(dimension)-1)
00898
00899
00900 return get_q(state);
00901 else
00902 {
00903
00904 for (register int32_t i=0; i<N; i++)
00905 beta[i]=get_q(i);
00906
00907
00908 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
00909 {
00910 for (register int32_t i=0; i<N; i++)
00911 {
00912 register int32_t j, num=trans_list_backward_cnt[i] ;
00913 float64_t sum=-CMath::INFTY;
00914 for (j=0; j<num; j++)
00915 {
00916 int32_t jj = trans_list_backward[i][j] ;
00917 sum= CMath::logarithmic_sum(sum, get_a(i, jj) + get_b(jj, p_observations->get_feature(dimension,t)) + beta[jj]);
00918 } ;
00919 beta_new[i]=sum;
00920 }
00921
00922 if (!BETA_CACHE(dimension).table)
00923 {
00924 dummy=beta;
00925 beta=beta_new;
00926 beta_new=dummy;
00927 }
00928 else
00929 {
00930 beta=beta_new;
00931 beta_new-=N;
00932 }
00933 }
00934
00935 if (time>=0)
00936 {
00937 register int32_t j, num=trans_list_backward_cnt[state] ;
00938 float64_t sum=-CMath::INFTY;
00939 for (j=0; j<num; j++)
00940 {
00941 int32_t jj = trans_list_backward[state][j] ;
00942 sum= CMath::logarithmic_sum(sum, get_a(state, jj) + get_b(jj, p_observations->get_feature(dimension,time+1))+beta[jj]);
00943 } ;
00944 return sum;
00945 }
00946 else
00947 {
00948 if (BETA_CACHE(dimension).table)
00949 {
00950 float64_t sum=-CMath::INFTY;
00951 for (register int32_t j=0; j<N; j++)
00952 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00953 BETA_CACHE(dimension).sum=sum;
00954 BETA_CACHE(dimension).dimension=dimension;
00955 BETA_CACHE(dimension).updated=true;
00956
00957 if (wanted_time<p_observations->get_vector_length(dimension))
00958 return BETA_CACHE(dimension).table[wanted_time*N+state];
00959 else
00960 return BETA_CACHE(dimension).sum;
00961 }
00962 else
00963 {
00964 float64_t sum=-CMath::INFTY;
00965 for (register int32_t j=0; j<N; j++)
00966 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00967 return sum;
00968 }
00969 }
00970 }
00971 }
00972
00973
00974 float64_t CHMM::backward_comp_old(
00975 int32_t time, int32_t state, int32_t dimension)
00976 {
00977 T_ALPHA_BETA_TABLE* beta_new;
00978 T_ALPHA_BETA_TABLE* beta;
00979 T_ALPHA_BETA_TABLE* dummy;
00980 int32_t wanted_time=time;
00981
00982 if (time<0)
00983 forward(time, state, dimension);
00984
00985 if (BETA_CACHE(dimension).table)
00986 {
00987 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00988 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00989 time=-1;
00990 }
00991 else
00992 {
00993 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00994 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00995 }
00996
00997 if (time>=p_observations->get_vector_length(dimension)-1)
00998
00999
01000 return get_q(state);
01001 else
01002 {
01003
01004 for (register int32_t i=0; i<N; i++)
01005 beta[i]=get_q(i);
01006
01007
01008 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
01009 {
01010 for (register int32_t i=0; i<N; i++)
01011 {
01012 register int32_t j ;
01013 #ifdef USE_LOGSUMARRAY
01014 for (j=0; j<(N>>1); j++)
01015 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01016 get_a(i, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,t)) + beta[j<<1],
01017 get_a(i, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,t)) + beta[(j<<1)+1]);
01018 if (N%2==1)
01019 beta_new[i]=CMath::logarithmic_sum(get_a(i, N-1) + get_b(N-1, p_observations->get_feature(dimension,t)) + beta[N-1],
01020 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01021 else
01022 beta_new[i]=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01023 #else //USE_LOGSUMARRAY
01024 float64_t sum=-CMath::INFTY;
01025 for (j=0; j<N; j++)
01026 sum= CMath::logarithmic_sum(sum, get_a(i, j) + get_b(j, p_observations->get_feature(dimension,t)) + beta[j]);
01027
01028 beta_new[i]=sum;
01029 #endif //USE_LOGSUMARRAY
01030 }
01031
01032 if (!BETA_CACHE(dimension).table)
01033 {
01034 dummy=beta;
01035 beta=beta_new;
01036 beta_new=dummy;
01037 }
01038 else
01039 {
01040 beta=beta_new;
01041 beta_new-=N;
01042 }
01043 }
01044
01045 if (time>=0)
01046 {
01047 register int32_t j ;
01048 #ifdef USE_LOGSUMARRAY
01049 for (j=0; j<(N>>1); j++)
01050 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01051 get_a(state, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,time+1)) + beta[j<<1],
01052 get_a(state, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,time+1)) + beta[(j<<1)+1]);
01053 if (N%2==1)
01054 return CMath::logarithmic_sum(get_a(state, N-1) + get_b(N-1, p_observations->get_feature(dimension,time+1)) + beta[N-1],
01055 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01056 else
01057 return CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01058 #else //USE_LOGSUMARRAY
01059 float64_t sum=-CMath::INFTY;
01060 for (j=0; j<N; j++)
01061 sum= CMath::logarithmic_sum(sum, get_a(state, j) + get_b(j, p_observations->get_feature(dimension,time+1))+beta[j]);
01062
01063 return sum;
01064 #endif //USE_LOGSUMARRAY
01065 }
01066 else
01067 {
01068 if (BETA_CACHE(dimension).table)
01069 {
01070 #ifdef USE_LOGSUMARRAY//AAA
01071 for (int32_t j=0; j<(N>>1); j++)
01072 ARRAYS(dimension)[j]=CMath::logarithmic_sum(get_p(j<<1) + get_b(j<<1, p_observations->get_feature(dimension,0))+beta[j<<1],
01073 get_p((j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,0))+beta[(j<<1)+1]) ;
01074 if (N%2==1)
01075 BETA_CACHE(dimension).sum=CMath::logarithmic_sum(get_p(N-1) + get_b(N-1, p_observations->get_feature(dimension,0))+beta[N-1],
01076 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01077 else
01078 BETA_CACHE(dimension).sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01079 #else //USE_LOGSUMARRAY
01080 float64_t sum=-CMath::INFTY;
01081 for (register int32_t j=0; j<N; j++)
01082 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01083 BETA_CACHE(dimension).sum=sum;
01084 #endif //USE_LOGSUMARRAY
01085 BETA_CACHE(dimension).dimension=dimension;
01086 BETA_CACHE(dimension).updated=true;
01087
01088 if (wanted_time<p_observations->get_vector_length(dimension))
01089 return BETA_CACHE(dimension).table[wanted_time*N+state];
01090 else
01091 return BETA_CACHE(dimension).sum;
01092 }
01093 else
01094 {
01095 float64_t sum=-CMath::INFTY;
01096 for (register int32_t j=0; j<N; j++)
01097 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01098 return sum;
01099 }
01100 }
01101 }
01102 }
01103
01104
01105
01106 float64_t CHMM::best_path(int32_t dimension)
01107 {
01108 if (!p_observations)
01109 return -1;
01110
01111 if (dimension==-1)
01112 {
01113 if (!all_path_prob_updated)
01114 {
01115 SG_INFO( "computing full viterbi likelihood\n") ;
01116 float64_t sum = 0 ;
01117 for (int32_t i=0; i<p_observations->get_num_vectors(); i++)
01118 sum+=best_path(i) ;
01119 sum /= p_observations->get_num_vectors() ;
01120 all_pat_prob=sum ;
01121 all_path_prob_updated=true ;
01122 return sum ;
01123 } else
01124 return all_pat_prob ;
01125 } ;
01126
01127 if (!STATES_PER_OBSERVATION_PSI(dimension))
01128 return -1 ;
01129
01130 if (dimension >= p_observations->get_num_vectors())
01131 return -1;
01132
01133 if (PATH_PROB_UPDATED(dimension) && dimension==PATH_PROB_DIMENSION(dimension))
01134 return pat_prob;
01135 else
01136 {
01137 register float64_t* delta= ARRAYN2(dimension);
01138 register float64_t* delta_new= ARRAYN1(dimension);
01139
01140 {
01141 for (register int32_t i=0; i<N; i++)
01142 {
01143 delta[i]=get_p(i)+get_b(i, p_observations->get_feature(dimension,0));
01144 set_psi(0, i, 0, dimension);
01145 }
01146 }
01147
01148 #ifdef USE_PATHDEBUG
01149 float64_t worst=-CMath::INFTY/4 ;
01150 #endif
01151
01152 for (register int32_t t=1; t<p_observations->get_vector_length(dimension); t++)
01153 {
01154 register float64_t* dummy;
01155 register int32_t NN=N ;
01156 for (register int32_t j=0; j<NN; j++)
01157 {
01158 register float64_t * matrix_a=&transition_matrix_a[j*N] ;
01159 register float64_t maxj=delta[0] + matrix_a[0];
01160 register int32_t argmax=0;
01161
01162 for (register int32_t i=1; i<NN; i++)
01163 {
01164 register float64_t temp = delta[i] + matrix_a[i];
01165
01166 if (temp>maxj)
01167 {
01168 maxj=temp;
01169 argmax=i;
01170 }
01171 }
01172 #ifdef FIX_POS
01173 if ((!model) || (model->get_fix_pos_state(t,j,NN)!=Model::FIX_DISALLOWED))
01174 #endif
01175 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t));
01176 #ifdef FIX_POS
01177 else
01178 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t)) + Model::DISALLOWED_PENALTY;
01179 #endif
01180 set_psi(t, j, argmax, dimension);
01181 }
01182
01183 #ifdef USE_PATHDEBUG
01184 float64_t best=log(0) ;
01185 for (int32_t jj=0; jj<N; jj++)
01186 if (delta_new[jj]>best)
01187 best=delta_new[jj] ;
01188
01189 if (best<-CMath::INFTY/2)
01190 {
01191 SG_DEBUG( "worst case at %i: %e:%e\n", t, best, worst) ;
01192 worst=best ;
01193 } ;
01194 #endif
01195
01196 dummy=delta;
01197 delta=delta_new;
01198 delta_new=dummy;
01199 }
01200
01201
01202 {
01203 register float64_t maxj=delta[0]+get_q(0);
01204 register int32_t argmax=0;
01205
01206 for (register int32_t i=1; i<N; i++)
01207 {
01208 register float64_t temp=delta[i]+get_q(i);
01209
01210 if (temp>maxj)
01211 {
01212 maxj=temp;
01213 argmax=i;
01214 }
01215 }
01216 pat_prob=maxj;
01217 PATH(dimension)[p_observations->get_vector_length(dimension)-1]=argmax;
01218 } ;
01219
01220
01221 {
01222 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>0; t--)
01223 {
01224 PATH(dimension)[t-1]=get_psi(t, PATH(dimension)[t], dimension);
01225 }
01226 }
01227 PATH_PROB_UPDATED(dimension)=true;
01228 PATH_PROB_DIMENSION(dimension)=dimension;
01229 return pat_prob ;
01230 }
01231 }
01232
01233 #ifndef USE_HMMPARALLEL
01234 float64_t CHMM::model_probability_comp()
01235 {
01236
01237 mod_prob=0 ;
01238 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
01239 mod_prob+=forward(p_observations->get_vector_length(dim), 0, dim);
01240
01241 mod_prob_updated=true;
01242 return mod_prob;
01243 }
01244
01245 #else
01246
01247 float64_t CHMM::model_probability_comp()
01248 {
01249 pthread_t *threads=new pthread_t[parallel->get_num_threads()];
01250 S_BW_THREAD_PARAM *params=new S_BW_THREAD_PARAM[parallel->get_num_threads()];
01251
01252 SG_INFO( "computing full model probablity\n");
01253 mod_prob=0;
01254
01255 for (int32_t cpu=0; cpu<parallel->get_num_threads(); cpu++)
01256 {
01257 params[cpu].hmm=this ;
01258 params[cpu].dim_start= p_observations->get_num_vectors()*cpu/parallel->get_num_threads();
01259 params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1)/parallel->get_num_threads();
01260 params[cpu].p_buf=new float64_t[N];
01261 params[cpu].q_buf=new float64_t[N];
01262 params[cpu].a_buf=new float64_t[N*N];
01263 params[cpu].b_buf=new float64_t[N*M];
01264 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)¶ms[cpu]);
01265 }
01266
01267 for (int32_t cpu=0; cpu<parallel->get_num_threads(); cpu++)
01268 {
01269 pthread_join(threads[cpu], NULL);
01270 mod_prob+=params[cpu].ret;
01271 }
01272
01273 for (int32_t i=0; i<parallel->get_num_threads(); i++)
01274 {
01275 delete[] params[i].p_buf;
01276 delete[] params[i].q_buf;
01277 delete[] params[i].a_buf;
01278 delete[] params[i].b_buf;
01279 }
01280
01281 delete[] threads;
01282 delete[] params;
01283
01284 mod_prob_updated=true;
01285 return mod_prob;
01286 }
01287
01288 void* CHMM::bw_dim_prefetch(void* params)
01289 {
01290 CHMM* hmm=((S_BW_THREAD_PARAM*) params)->hmm;
01291 int32_t start=((S_BW_THREAD_PARAM*) params)->dim_start;
01292 int32_t stop=((S_BW_THREAD_PARAM*) params)->dim_stop;
01293 float64_t* p_buf=((S_BW_THREAD_PARAM*) params)->p_buf;
01294 float64_t* q_buf=((S_BW_THREAD_PARAM*) params)->q_buf;
01295 float64_t* a_buf=((S_BW_THREAD_PARAM*) params)->a_buf;
01296 float64_t* b_buf=((S_BW_THREAD_PARAM*) params)->b_buf;
01297 ((S_BW_THREAD_PARAM*)params)->ret=0;
01298
01299 for (int32_t dim=start; dim<stop; dim++)
01300 {
01301 hmm->forward_comp(hmm->p_observations->get_vector_length(dim), hmm->N-1, dim) ;
01302 hmm->backward_comp(hmm->p_observations->get_vector_length(dim), hmm->N-1, dim) ;
01303 float64_t modprob=hmm->model_probability(dim) ;
01304 hmm->ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01305 ((S_BW_THREAD_PARAM*)params)->ret+= modprob;
01306 }
01307 return NULL ;
01308 }
01309
01310 void* CHMM::bw_single_dim_prefetch(void * params)
01311 {
01312 CHMM* hmm=((S_BW_THREAD_PARAM*)params)->hmm ;
01313 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
01314 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->model_probability(dim);
01315 return NULL ;
01316 }
01317
01318 void* CHMM::vit_dim_prefetch(void * params)
01319 {
01320 CHMM* hmm=((S_DIM_THREAD_PARAM*)params)->hmm ;
01321 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
01322 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->best_path(dim);
01323 return NULL ;
01324 }
01325
01326 #endif //USE_HMMPARALLEL
01327
01328
01329 #ifdef USE_HMMPARALLEL
01330
01331 void CHMM::ab_buf_comp(
01332 float64_t* p_buf, float64_t* q_buf, float64_t *a_buf, float64_t* b_buf,
01333 int32_t dim)
01334 {
01335 int32_t i,j,t ;
01336 float64_t a_sum;
01337 float64_t b_sum;
01338
01339 float64_t dimmodprob=model_probability(dim);
01340
01341 for (i=0; i<N; i++)
01342 {
01343
01344 p_buf[i]=get_p(i)+get_b(i,p_observations->get_feature(dim,0))+backward(0,i,dim) - dimmodprob;
01345 q_buf[i]=forward(p_observations->get_vector_length(dim)-1, i, dim)+get_q(i) - dimmodprob;
01346
01347
01348 for (j=0; j<N; j++)
01349 {
01350 a_sum=-CMath::INFTY;
01351
01352 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01353 {
01354 a_sum= CMath::logarithmic_sum(a_sum, forward(t,i,dim)+
01355 get_a(i,j)+get_b(j,p_observations->get_feature(dim,t+1))+backward(t+1,j,dim));
01356 }
01357 a_buf[N*i+j]=a_sum-dimmodprob ;
01358 }
01359
01360
01361 for (j=0; j<M; j++)
01362 {
01363 b_sum=-CMath::INFTY;
01364
01365 for (t=0; t<p_observations->get_vector_length(dim); t++)
01366 {
01367 if (p_observations->get_feature(dim,t)==j)
01368 b_sum=CMath::logarithmic_sum(b_sum, forward(t,i,dim)+backward(t, i, dim));
01369 }
01370
01371 b_buf[M*i+j]=b_sum-dimmodprob ;
01372 }
01373 }
01374 }
01375
01376
01377 void CHMM::estimate_model_baum_welch(CHMM* hmm)
01378 {
01379 int32_t i,j,cpu;
01380 float64_t fullmodprob=0;
01381
01382
01383 for (i=0; i<N; i++)
01384 {
01385 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
01386 set_p(i,log(PSEUDO));
01387 else
01388 set_p(i,hmm->get_p(i));
01389 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
01390 set_q(i,log(PSEUDO));
01391 else
01392 set_q(i,hmm->get_q(i));
01393
01394 for (j=0; j<N; j++)
01395 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01396 set_a(i,j, log(PSEUDO));
01397 else
01398 set_a(i,j,hmm->get_a(i,j));
01399 for (j=0; j<M; j++)
01400 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01401 set_b(i,j, log(PSEUDO));
01402 else
01403 set_b(i,j,hmm->get_b(i,j));
01404 }
01405 invalidate_model();
01406
01407 int32_t num_threads = parallel->get_num_threads();
01408
01409 pthread_t *threads=new pthread_t[num_threads] ;
01410 S_BW_THREAD_PARAM *params=new S_BW_THREAD_PARAM[num_threads] ;
01411
01412 if (p_observations->get_num_vectors()<num_threads)
01413 num_threads=p_observations->get_num_vectors();
01414
01415 for (cpu=0; cpu<num_threads; cpu++)
01416 {
01417 params[cpu].p_buf=new float64_t[N];
01418 params[cpu].q_buf=new float64_t[N];
01419 params[cpu].a_buf=new float64_t[N*N];
01420 params[cpu].b_buf=new float64_t[N*M];
01421
01422 params[cpu].hmm=hmm;
01423 int32_t start = p_observations->get_num_vectors()*cpu / num_threads;
01424 int32_t stop=p_observations->get_num_vectors()*(cpu+1) / num_threads;
01425
01426 if (cpu == parallel->get_num_threads()-1)
01427 stop=p_observations->get_num_vectors();
01428
01429 ASSERT(start<stop);
01430 params[cpu].dim_start=start;
01431 params[cpu].dim_stop=stop;
01432
01433 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, ¶ms[cpu]);
01434 }
01435
01436 for (cpu=0; cpu<num_threads; cpu++)
01437 {
01438 pthread_join(threads[cpu], NULL);
01439
01440 for (i=0; i<N; i++)
01441 {
01442
01443 set_p(i, CMath::logarithmic_sum(get_p(i), params[cpu].p_buf[i]));
01444 set_q(i, CMath::logarithmic_sum(get_q(i), params[cpu].q_buf[i]));
01445
01446
01447 for (j=0; j<N; j++)
01448 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), params[cpu].a_buf[N*i+j]));
01449
01450
01451 for (j=0; j<M; j++)
01452 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), params[cpu].b_buf[M*i+j]));
01453 }
01454
01455 fullmodprob+=params[cpu].ret;
01456
01457 }
01458
01459 for (cpu=0; cpu<num_threads; cpu++)
01460 {
01461 delete[] params[cpu].p_buf;
01462 delete[] params[cpu].q_buf;
01463 delete[] params[cpu].a_buf;
01464 delete[] params[cpu].b_buf;
01465 }
01466
01467 delete[] threads ;
01468 delete[] params ;
01469
01470
01471 hmm->mod_prob=fullmodprob;
01472 hmm->mod_prob_updated=true ;
01473
01474
01475 normalize();
01476 invalidate_model();
01477 }
01478
01479 #else // USE_HMMPARALLEL
01480
01481
01482 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01483 {
01484 int32_t i,j,t,dim;
01485 float64_t a_sum, b_sum;
01486 float64_t dimmodprob=0;
01487 float64_t fullmodprob=0;
01488
01489
01490 for (i=0; i<N; i++)
01491 {
01492 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01493 set_p(i,log(PSEUDO));
01494 else
01495 set_p(i,estimate->get_p(i));
01496 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01497 set_q(i,log(PSEUDO));
01498 else
01499 set_q(i,estimate->get_q(i));
01500
01501 for (j=0; j<N; j++)
01502 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01503 set_a(i,j, log(PSEUDO));
01504 else
01505 set_a(i,j,estimate->get_a(i,j));
01506 for (j=0; j<M; j++)
01507 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01508 set_b(i,j, log(PSEUDO));
01509 else
01510 set_b(i,j,estimate->get_b(i,j));
01511 }
01512 invalidate_model();
01513
01514
01515 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01516 {
01517 dimmodprob=estimate->model_probability(dim);
01518 fullmodprob+=dimmodprob ;
01519
01520 for (i=0; i<N; i++)
01521 {
01522
01523 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01524 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01525
01526 int32_t num = trans_list_backward_cnt[i] ;
01527
01528
01529 for (j=0; j<num; j++)
01530 {
01531 int32_t jj = trans_list_backward[i][j] ;
01532 a_sum=-CMath::INFTY;
01533
01534 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01535 {
01536 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01537 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01538 }
01539 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01540 }
01541
01542
01543 for (j=0; j<M; j++)
01544 {
01545 b_sum=-CMath::INFTY;
01546
01547 for (t=0; t<p_observations->get_vector_length(dim); t++)
01548 {
01549 if (p_observations->get_feature(dim,t)==j)
01550 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01551 }
01552
01553 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01554 }
01555 }
01556 }
01557
01558
01559 estimate->mod_prob=fullmodprob;
01560 estimate->mod_prob_updated=true ;
01561
01562
01563 normalize();
01564 invalidate_model();
01565 }
01566
01567
01568 void CHMM::estimate_model_baum_welch_old(CHMM* estimate)
01569 {
01570 int32_t i,j,t,dim;
01571 float64_t a_sum, b_sum;
01572 float64_t dimmodprob=0;
01573 float64_t fullmodprob=0;
01574
01575
01576 for (i=0; i<N; i++)
01577 {
01578 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01579 set_p(i,log(PSEUDO));
01580 else
01581 set_p(i,estimate->get_p(i));
01582 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01583 set_q(i,log(PSEUDO));
01584 else
01585 set_q(i,estimate->get_q(i));
01586
01587 for (j=0; j<N; j++)
01588 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01589 set_a(i,j, log(PSEUDO));
01590 else
01591 set_a(i,j,estimate->get_a(i,j));
01592 for (j=0; j<M; j++)
01593 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01594 set_b(i,j, log(PSEUDO));
01595 else
01596 set_b(i,j,estimate->get_b(i,j));
01597 }
01598 invalidate_model();
01599
01600
01601 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01602 {
01603 dimmodprob=estimate->model_probability(dim);
01604 fullmodprob+=dimmodprob ;
01605
01606 for (i=0; i<N; i++)
01607 {
01608
01609 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01610 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01611
01612
01613 for (j=0; j<N; j++)
01614 {
01615 a_sum=-CMath::INFTY;
01616
01617 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01618 {
01619 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01620 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01621 }
01622 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum-dimmodprob));
01623 }
01624
01625
01626 for (j=0; j<M; j++)
01627 {
01628 b_sum=-CMath::INFTY;
01629
01630 for (t=0; t<p_observations->get_vector_length(dim); t++)
01631 {
01632 if (p_observations->get_feature(dim,t)==j)
01633 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01634 }
01635
01636 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01637 }
01638 }
01639 }
01640
01641
01642 estimate->mod_prob=fullmodprob;
01643 estimate->mod_prob_updated=true ;
01644
01645
01646 normalize();
01647 invalidate_model();
01648 }
01649 #endif // USE_HMMPARALLEL
01650
01651
01652
01653 void CHMM::estimate_model_baum_welch_trans(CHMM* estimate)
01654 {
01655 int32_t i,j,t,dim;
01656 float64_t a_sum;
01657 float64_t dimmodprob=0;
01658 float64_t fullmodprob=0;
01659
01660
01661 for (i=0; i<N; i++)
01662 {
01663 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01664 set_p(i,log(PSEUDO));
01665 else
01666 set_p(i,estimate->get_p(i));
01667 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01668 set_q(i,log(PSEUDO));
01669 else
01670 set_q(i,estimate->get_q(i));
01671
01672 for (j=0; j<N; j++)
01673 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01674 set_a(i,j, log(PSEUDO));
01675 else
01676 set_a(i,j,estimate->get_a(i,j));
01677 for (j=0; j<M; j++)
01678 set_b(i,j,estimate->get_b(i,j));
01679 }
01680 invalidate_model();
01681
01682
01683 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01684 {
01685 dimmodprob=estimate->model_probability(dim);
01686 fullmodprob+=dimmodprob ;
01687
01688 for (i=0; i<N; i++)
01689 {
01690
01691 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01692 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01693
01694 int32_t num = trans_list_backward_cnt[i] ;
01695
01696 for (j=0; j<num; j++)
01697 {
01698 int32_t jj = trans_list_backward[i][j] ;
01699 a_sum=-CMath::INFTY;
01700
01701 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01702 {
01703 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01704 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01705 }
01706 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01707 }
01708 }
01709 }
01710
01711
01712 estimate->mod_prob=fullmodprob;
01713 estimate->mod_prob_updated=true ;
01714
01715
01716 normalize();
01717 invalidate_model();
01718 }
01719
01720
01721
01722
01723 void CHMM::estimate_model_baum_welch_defined(CHMM* estimate)
01724 {
01725 int32_t i,j,old_i,k,t,dim;
01726 float64_t a_sum_num, b_sum_num;
01727 float64_t a_sum_denom, b_sum_denom;
01728 float64_t dimmodprob=-CMath::INFTY;
01729 float64_t fullmodprob=0;
01730 float64_t* A=ARRAYN1(0);
01731 float64_t* B=ARRAYN2(0);
01732
01733
01734
01735 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01736 set_p(i,log(PSEUDO));
01737
01738 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01739 set_q(i,log(PSEUDO));
01740
01741 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01742 {
01743 j=model->get_learn_a(k,1);
01744 set_a(i,j, log(PSEUDO));
01745 }
01746
01747 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01748 {
01749 j=model->get_learn_b(k,1);
01750 set_b(i,j, log(PSEUDO));
01751 }
01752
01753 for (i=0; i<N; i++)
01754 {
01755 A[i]=log(PSEUDO);
01756 B[i]=log(PSEUDO);
01757 }
01758
01759 #ifdef USE_HMMPARALLEL
01760 int32_t num_threads = parallel->get_num_threads();
01761 pthread_t *threads=new pthread_t[num_threads] ;
01762 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
01763
01764 if (p_observations->get_num_vectors()<num_threads)
01765 num_threads=p_observations->get_num_vectors();
01766 #endif
01767
01768
01769 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01770 {
01771 #ifdef USE_HMMPARALLEL
01772 if (dim%num_threads==0)
01773 {
01774 for (i=0; i<num_threads; i++)
01775 {
01776 if (dim+i<p_observations->get_num_vectors())
01777 {
01778 params[i].hmm=estimate ;
01779 params[i].dim=dim+i ;
01780 pthread_create(&threads[i], NULL, bw_single_dim_prefetch, (void*)¶ms[i]) ;
01781 }
01782 }
01783 for (i=0; i<num_threads; i++)
01784 {
01785 if (dim+i<p_observations->get_num_vectors())
01786 {
01787 pthread_join(threads[i], NULL);
01788 dimmodprob = params[i].prob_sum;
01789 }
01790 }
01791 }
01792 #else
01793 dimmodprob=estimate->model_probability(dim);
01794 #endif // USE_HMMPARALLEL
01795
01796
01797 fullmodprob+= dimmodprob;
01798
01799
01800 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01801 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->forward(0,i,dim)+estimate->backward(0,i,dim) - dimmodprob ) );
01802
01803 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01804 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+
01805 estimate->backward(p_observations->get_vector_length(dim)-1, i, dim) - dimmodprob ) );
01806
01807
01808 old_i=-1;
01809 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01810 {
01811
01812
01813 if (old_i!=i)
01814 {
01815 old_i=i;
01816 a_sum_denom=-CMath::INFTY;
01817
01818 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01819 a_sum_denom= CMath::logarithmic_sum(a_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
01820
01821 A[i]= CMath::logarithmic_sum(A[i], a_sum_denom-dimmodprob);
01822 }
01823
01824 j=model->get_learn_a(k,1);
01825 a_sum_num=-CMath::INFTY;
01826 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01827 {
01828 a_sum_num= CMath::logarithmic_sum(a_sum_num, estimate->forward(t,i,dim)+
01829 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01830 }
01831
01832 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum_num-dimmodprob));
01833 }
01834
01835
01836 old_i=-1;
01837 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01838 {
01839
01840
01841
01842 if (old_i!=i)
01843 {
01844 old_i=i;
01845 b_sum_denom=-CMath::INFTY;
01846
01847 for (t=0; t<p_observations->get_vector_length(dim); t++)
01848 b_sum_denom= CMath::logarithmic_sum(b_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
01849
01850 B[i]= CMath::logarithmic_sum(B[i], b_sum_denom-dimmodprob);
01851 }
01852
01853 j=model->get_learn_b(k,1);
01854 b_sum_num=-CMath::INFTY;
01855 for (t=0; t<p_observations->get_vector_length(dim); t++)
01856 {
01857 if (p_observations->get_feature(dim,t)==j)
01858 b_sum_num=CMath::logarithmic_sum(b_sum_num, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01859 }
01860
01861 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum_num-dimmodprob));
01862 }
01863 }
01864 #ifdef USE_HMMPARALLEL
01865 delete[] threads ;
01866 delete[] params ;
01867 #endif
01868
01869
01870
01871 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01872 set_p(i, get_p(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
01873
01874 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01875 set_q(i, get_q(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
01876
01877 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01878 {
01879 j=model->get_learn_a(k,1);
01880 set_a(i,j, get_a(i,j) - A[i]);
01881 }
01882
01883 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01884 {
01885 j=model->get_learn_b(k,1);
01886 set_b(i,j, get_b(i,j) - B[i]);
01887 }
01888
01889
01890 estimate->mod_prob=fullmodprob;
01891 estimate->mod_prob_updated=true ;
01892
01893
01894 normalize();
01895 invalidate_model();
01896 }
01897
01898
01899 void CHMM::estimate_model_viterbi(CHMM* estimate)
01900 {
01901 int32_t i,j,t;
01902 float64_t sum;
01903 float64_t* P=ARRAYN1(0);
01904 float64_t* Q=ARRAYN2(0);
01905
01906 path_deriv_updated=false ;
01907
01908
01909 for (i=0; i<N; i++)
01910 {
01911 for (j=0; j<N; j++)
01912 set_A(i,j, PSEUDO);
01913
01914 for (j=0; j<M; j++)
01915 set_B(i,j, PSEUDO);
01916
01917 P[i]=PSEUDO;
01918 Q[i]=PSEUDO;
01919 }
01920
01921 float64_t allpatprob=0 ;
01922
01923 #ifdef USE_HMMPARALLEL
01924 int32_t num_threads = parallel->get_num_threads();
01925 pthread_t *threads=new pthread_t[num_threads] ;
01926 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
01927
01928 if (p_observations->get_num_vectors()<num_threads)
01929 num_threads=p_observations->get_num_vectors();
01930 #endif
01931
01932 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
01933 {
01934
01935 #ifdef USE_HMMPARALLEL
01936 if (dim%num_threads==0)
01937 {
01938 for (i=0; i<num_threads; i++)
01939 {
01940 if (dim+i<p_observations->get_num_vectors())
01941 {
01942 params[i].hmm=estimate ;
01943 params[i].dim=dim+i ;
01944 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
01945 }
01946 }
01947 for (i=0; i<num_threads; i++)
01948 {
01949 if (dim+i<p_observations->get_num_vectors())
01950 {
01951 pthread_join(threads[i], NULL);
01952 allpatprob += params[i].prob_sum;
01953 }
01954 }
01955 }
01956 #else
01957
01958 allpatprob += estimate->best_path(dim);
01959 #endif // USE_HMMPARALLEL
01960
01961
01962 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01963 {
01964 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
01965 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
01966 }
01967
01968 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
01969
01970 P[estimate->PATH(dim)[0]]++;
01971 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
01972 }
01973
01974 #ifdef USE_HMMPARALLEL
01975 delete[] threads;
01976 delete[] params;
01977 #endif
01978
01979 allpatprob/=p_observations->get_num_vectors() ;
01980 estimate->all_pat_prob=allpatprob ;
01981 estimate->all_path_prob_updated=true ;
01982
01983
01984 for (i=0; i<N; i++)
01985 {
01986 sum=0;
01987 for (j=0; j<N; j++)
01988 sum+=get_A(i,j);
01989
01990 for (j=0; j<N; j++)
01991 set_a(i,j, log(get_A(i,j)/sum));
01992 }
01993
01994
01995 for (i=0; i<N; i++)
01996 {
01997 sum=0;
01998 for (j=0; j<M; j++)
01999 sum+=get_B(i,j);
02000
02001 for (j=0; j<M; j++)
02002 set_b(i,j, log(get_B(i, j)/sum));
02003 }
02004
02005
02006 sum=0;
02007 for (i=0; i<N; i++)
02008 sum+=P[i];
02009
02010 for (i=0; i<N; i++)
02011 set_p(i, log(P[i]/sum));
02012
02013
02014 sum=0;
02015 for (i=0; i<N; i++)
02016 sum+=Q[i];
02017
02018 for (i=0; i<N; i++)
02019 set_q(i, log(Q[i]/sum));
02020
02021
02022 invalidate_model();
02023 }
02024
02025
02026 void CHMM::estimate_model_viterbi_defined(CHMM* estimate)
02027 {
02028 int32_t i,j,k,t;
02029 float64_t sum;
02030 float64_t* P=ARRAYN1(0);
02031 float64_t* Q=ARRAYN2(0);
02032
02033 path_deriv_updated=false ;
02034
02035
02036 for (i=0; i<N; i++)
02037 {
02038 for (j=0; j<N; j++)
02039 set_A(i,j, PSEUDO);
02040
02041 for (j=0; j<M; j++)
02042 set_B(i,j, PSEUDO);
02043
02044 P[i]=PSEUDO;
02045 Q[i]=PSEUDO;
02046 }
02047
02048 #ifdef USE_HMMPARALLEL
02049 int32_t num_threads = parallel->get_num_threads();
02050 pthread_t *threads=new pthread_t[num_threads] ;
02051 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
02052 #endif
02053
02054 float64_t allpatprob=0.0 ;
02055 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
02056 {
02057
02058 #ifdef USE_HMMPARALLEL
02059 if (dim%num_threads==0)
02060 {
02061 for (i=0; i<num_threads; i++)
02062 {
02063 if (dim+i<p_observations->get_num_vectors())
02064 {
02065 params[i].hmm=estimate ;
02066 params[i].dim=dim+i ;
02067 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
02068 }
02069 }
02070 for (i=0; i<num_threads; i++)
02071 {
02072 if (dim+i<p_observations->get_num_vectors())
02073 {
02074 pthread_join(threads[i], NULL);
02075 allpatprob += params[i].prob_sum;
02076 }
02077 }
02078 }
02079 #else // USE_HMMPARALLEL
02080
02081 allpatprob += estimate->best_path(dim);
02082 #endif // USE_HMMPARALLEL
02083
02084
02085
02086 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02087 {
02088 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02089 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02090 }
02091
02092 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02093
02094 P[estimate->PATH(dim)[0]]++;
02095 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02096 }
02097
02098 #ifdef USE_HMMPARALLEL
02099 delete[] threads ;
02100 delete[] params ;
02101 #endif
02102
02103
02104
02105
02106 allpatprob/=p_observations->get_num_vectors() ;
02107 estimate->all_pat_prob=allpatprob ;
02108 estimate->all_path_prob_updated=true ;
02109
02110
02111
02112 for (i=0; i<N; i++)
02113 {
02114 for (j=0; j<N; j++)
02115 set_a(i,j, estimate->get_a(i,j));
02116
02117 for (j=0; j<M; j++)
02118 set_b(i,j, estimate->get_b(i,j));
02119 }
02120
02121
02122 i=0;
02123 sum=0;
02124 j=model->get_learn_a(i,0);
02125 k=i;
02126 while (model->get_learn_a(i,0)!=-1 || k<i)
02127 {
02128 if (j==model->get_learn_a(i,0))
02129 {
02130 sum+=get_A(model->get_learn_a(i,0), model->get_learn_a(i,1));
02131 i++;
02132 }
02133 else
02134 {
02135 while (k<i)
02136 {
02137 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1), log (get_A(model->get_learn_a(k,0), model->get_learn_a(k,1)) / sum));
02138 k++;
02139 }
02140
02141 sum=0;
02142 j=model->get_learn_a(i,0);
02143 k=i;
02144 }
02145 }
02146
02147
02148 i=0;
02149 sum=0;
02150 j=model->get_learn_b(i,0);
02151 k=i;
02152 while (model->get_learn_b(i,0)!=-1 || k<i)
02153 {
02154 if (j==model->get_learn_b(i,0))
02155 {
02156 sum+=get_B(model->get_learn_b(i,0),model->get_learn_b(i,1));
02157 i++;
02158 }
02159 else
02160 {
02161 while (k<i)
02162 {
02163 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1), log (get_B(model->get_learn_b(k,0), model->get_learn_b(k,1)) / sum));
02164 k++;
02165 }
02166
02167 sum=0;
02168 j=model->get_learn_b(i,0);
02169 k=i;
02170 }
02171 }
02172
02173 i=0;
02174 sum=0;
02175 while (model->get_learn_p(i)!=-1)
02176 {
02177 sum+=P[model->get_learn_p(i)] ;
02178 i++ ;
02179 } ;
02180 i=0 ;
02181 while (model->get_learn_p(i)!=-1)
02182 {
02183 set_p(model->get_learn_p(i), log(P[model->get_learn_p(i)]/sum));
02184 i++ ;
02185 } ;
02186
02187 i=0;
02188 sum=0;
02189 while (model->get_learn_q(i)!=-1)
02190 {
02191 sum+=Q[model->get_learn_q(i)] ;
02192 i++ ;
02193 } ;
02194 i=0 ;
02195 while (model->get_learn_q(i)!=-1)
02196 {
02197 set_q(model->get_learn_q(i), log(Q[model->get_learn_q(i)]/sum));
02198 i++ ;
02199 } ;
02200
02201
02202
02203 invalidate_model();
02204 }
02205
02206
02207
02208 void CHMM::output_model(bool verbose)
02209 {
02210 int32_t i,j;
02211 float64_t checksum;
02212
02213
02214 SG_INFO( "log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02215 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02216 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02217
02218 if (verbose)
02219 {
02220
02221 SG_INFO( "\ntransition matrix\n");
02222 for (i=0; i<N; i++)
02223 {
02224 checksum= get_q(i);
02225 for (j=0; j<N; j++)
02226 {
02227 checksum= CMath::logarithmic_sum(checksum, get_a(i,j));
02228
02229 SG_INFO( "a(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_a(i,j)));
02230
02231 if (j % 4 == 3)
02232 SG_PRINT( "\n");
02233 }
02234 if (fabs(checksum)>1e-5)
02235 SG_DEBUG( " checksum % E ******* \n",checksum);
02236 else
02237 SG_DEBUG( " checksum % E\n",checksum);
02238 }
02239
02240
02241 SG_INFO( "\ndistribution of start states\n");
02242 checksum=-CMath::INFTY;
02243 for (i=0; i<N; i++)
02244 {
02245 checksum= CMath::logarithmic_sum(checksum, get_p(i));
02246 SG_INFO( "p(%02i)=%1.4f ",i, (float32_t) exp(get_p(i)));
02247 if (i % 4 == 3)
02248 SG_PRINT( "\n");
02249 }
02250 if (fabs(checksum)>1e-5)
02251 SG_DEBUG( " checksum % E ******* \n",checksum);
02252 else
02253 SG_DEBUG( " checksum=% E\n", checksum);
02254
02255
02256 SG_INFO( "\ndistribution of terminal states\n");
02257 checksum=-CMath::INFTY;
02258 for (i=0; i<N; i++)
02259 {
02260 checksum= CMath::logarithmic_sum(checksum, get_q(i));
02261 SG_INFO("q(%02i)=%1.4f ",i, (float32_t) exp(get_q(i)));
02262 if (i % 4 == 3)
02263 SG_INFO("\n");
02264 }
02265 if (fabs(checksum)>1e-5)
02266 SG_DEBUG( " checksum % E ******* \n",checksum);
02267 else
02268 SG_DEBUG( " checksum=% E\n", checksum);
02269
02270
02271 SG_INFO("\ndistribution of observations given the state\n");
02272 for (i=0; i<N; i++)
02273 {
02274 checksum=-CMath::INFTY;
02275 for (j=0; j<M; j++)
02276 {
02277 checksum=CMath::logarithmic_sum(checksum, get_b(i,j));
02278 SG_INFO("b(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_b(i,j)));
02279 if (j % 4 == 3)
02280 SG_PRINT("\n");
02281 }
02282 if (fabs(checksum)>1e-5)
02283 SG_DEBUG( " checksum % E ******* \n",checksum);
02284 else
02285 SG_DEBUG( " checksum % E\n",checksum);
02286 }
02287 }
02288 SG_PRINT("\n");
02289 }
02290
02291
02292 void CHMM::output_model_defined(bool verbose)
02293 {
02294 int32_t i,j;
02295 if (!model)
02296 return ;
02297
02298
02299 SG_INFO("log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02300 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02301 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02302
02303 if (verbose)
02304 {
02305
02306 SG_INFO("\ntransition matrix\n");
02307
02308
02309 i=0;
02310 j=model->get_learn_a(i,0);
02311 while (model->get_learn_a(i,0)!=-1)
02312 {
02313 if (j!=model->get_learn_a(i,0))
02314 {
02315 j=model->get_learn_a(i,0);
02316 SG_PRINT("\n");
02317 }
02318
02319 SG_INFO("a(%02i,%02i)=%1.4f ",model->get_learn_a(i,0), model->get_learn_a(i,1), (float32_t) exp(get_a(model->get_learn_a(i,0), model->get_learn_a(i,1))));
02320 i++;
02321 }
02322
02323
02324 SG_INFO("\n\ndistribution of observations given the state\n");
02325 i=0;
02326 j=model->get_learn_b(i,0);
02327 while (model->get_learn_b(i,0)!=-1)
02328 {
02329 if (j!=model->get_learn_b(i,0))
02330 {
02331 j=model->get_learn_b(i,0);
02332 SG_PRINT("\n");
02333 }
02334
02335 SG_INFO("b(%02i,%02i)=%1.4f ",model->get_learn_b(i,0),model->get_learn_b(i,1), (float32_t) exp(get_b(model->get_learn_b(i,0),model->get_learn_b(i,1))));
02336 i++;
02337 }
02338
02339 SG_PRINT("\n");
02340 }
02341 SG_PRINT("\n");
02342 }
02343
02344
02345
02346
02347 void CHMM::convert_to_log()
02348 {
02349 int32_t i,j;
02350
02351 for (i=0; i<N; i++)
02352 {
02353 if (get_p(i)!=0)
02354 set_p(i, log(get_p(i)));
02355 else
02356 set_p(i, -CMath::INFTY);;
02357 }
02358
02359 for (i=0; i<N; i++)
02360 {
02361 if (get_q(i)!=0)
02362 set_q(i, log(get_q(i)));
02363 else
02364 set_q(i, -CMath::INFTY);;
02365 }
02366
02367 for (i=0; i<N; i++)
02368 {
02369 for (j=0; j<N; j++)
02370 {
02371 if (get_a(i,j)!=0)
02372 set_a(i,j, log(get_a(i,j)));
02373 else
02374 set_a(i,j, -CMath::INFTY);
02375 }
02376 }
02377
02378 for (i=0; i<N; i++)
02379 {
02380 for (j=0; j<M; j++)
02381 {
02382 if (get_b(i,j)!=0)
02383 set_b(i,j, log(get_b(i,j)));
02384 else
02385 set_b(i,j, -CMath::INFTY);
02386 }
02387 }
02388 loglikelihood=true;
02389
02390 invalidate_model();
02391 }
02392
02393
02394 void CHMM::init_model_random()
02395 {
02396 const float64_t MIN_RAND=23e-3;
02397
02398 float64_t sum;
02399 int32_t i,j;
02400
02401
02402 for (i=0; i<N; i++)
02403 {
02404 sum=0;
02405 for (j=0; j<N; j++)
02406 {
02407 set_a(i,j, CMath::random(MIN_RAND, 1.0));
02408
02409 sum+=get_a(i,j);
02410 }
02411
02412 for (j=0; j<N; j++)
02413 set_a(i,j, get_a(i,j)/sum);
02414 }
02415
02416
02417 sum=0;
02418 for (i=0; i<N; i++)
02419 {
02420 set_p(i, CMath::random(MIN_RAND, 1.0));
02421
02422 sum+=get_p(i);
02423 }
02424
02425 for (i=0; i<N; i++)
02426 set_p(i, get_p(i)/sum);
02427
02428
02429 sum=0;
02430 for (i=0; i<N; i++)
02431 {
02432 set_q(i, CMath::random(MIN_RAND, 1.0));
02433
02434 sum+=get_q(i);
02435 }
02436
02437 for (i=0; i<N; i++)
02438 set_q(i, get_q(i)/sum);
02439
02440
02441 for (i=0; i<N; i++)
02442 {
02443 sum=0;
02444 for (j=0; j<M; j++)
02445 {
02446 set_b(i,j, CMath::random(MIN_RAND, 1.0));
02447
02448 sum+=get_b(i,j);
02449 }
02450
02451 for (j=0; j<M; j++)
02452 set_b(i,j, get_b(i,j)/sum);
02453 }
02454
02455
02456 invalidate_model();
02457 }
02458
02459
02460 void CHMM::init_model_defined()
02461 {
02462 int32_t i,j,k,r;
02463 float64_t sum;
02464 const float64_t MIN_RAND=23e-3;
02465
02466
02467 for (i=0; i<N; i++)
02468 for (j=0; j<N; j++)
02469 set_a(i,j, 0);
02470
02471
02472 for (i=0; i<N; i++)
02473 set_p(i, 0);
02474
02475
02476 for (i=0; i<N; i++)
02477 set_q(i, 0);
02478
02479
02480 for (i=0; i<N; i++)
02481 for (j=0; j<M; j++)
02482 set_b(i,j, 0);
02483
02484
02485
02486 float64_t *R=new float64_t[N] ;
02487 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02488 i=0; sum=0; k=i;
02489 j=model->get_learn_a(i,0);
02490 while (model->get_learn_a(i,0)!=-1 || k<i)
02491 {
02492 if (j==model->get_learn_a(i,0))
02493 {
02494 sum+=R[model->get_learn_a(i,1)] ;
02495 i++;
02496 }
02497 else
02498 {
02499 while (k<i)
02500 {
02501 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1),
02502 R[model->get_learn_a(k,1)]/sum);
02503 k++;
02504 }
02505 j=model->get_learn_a(i,0);
02506 k=i;
02507 sum=0;
02508 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02509 }
02510 }
02511 delete[] R ; R=NULL ;
02512
02513
02514 R=new float64_t[M] ;
02515 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02516 i=0; sum=0; k=0 ;
02517 j=model->get_learn_b(i,0);
02518 while (model->get_learn_b(i,0)!=-1 || k<i)
02519 {
02520 if (j==model->get_learn_b(i,0))
02521 {
02522 sum+=R[model->get_learn_b(i,1)] ;
02523 i++;
02524 }
02525 else
02526 {
02527 while (k<i)
02528 {
02529 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1),
02530 R[model->get_learn_b(k,1)]/sum);
02531 k++;
02532 }
02533
02534 j=model->get_learn_b(i,0);
02535 k=i;
02536 sum=0;
02537 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02538 }
02539 }
02540 delete[] R ; R=NULL ;
02541
02542
02543 i=0;
02544 while (model->get_const_a(i,0) != -1)
02545 {
02546 set_a(model->get_const_a(i,0), model->get_const_a(i,1), model->get_const_a_val(i));
02547 i++;
02548 }
02549
02550
02551 i=0;
02552 while (model->get_const_b(i,0) != -1)
02553 {
02554 set_b(model->get_const_b(i,0), model->get_const_b(i,1), model->get_const_b_val(i));
02555 i++;
02556 }
02557
02558
02559 i=0;
02560 while (model->get_const_p(i) != -1)
02561 {
02562 set_p(model->get_const_p(i), model->get_const_p_val(i));
02563 i++;
02564 }
02565
02566
02567 for (i=0; i<N; i++)
02568 set_q(i, 0.0);
02569
02570
02571 i=0;
02572 while (model->get_const_q(i) != -1)
02573 {
02574 set_q(model->get_const_q(i), model->get_const_q_val(i));
02575 i++;
02576 }
02577
02578
02579 i=0;
02580 sum=0;
02581 while (model->get_learn_p(i)!=-1)
02582 {
02583 set_p(model->get_learn_p(i),CMath::random(MIN_RAND,1.0)) ;
02584 sum+=get_p(model->get_learn_p(i)) ;
02585 i++ ;
02586 } ;
02587 i=0 ;
02588 while (model->get_learn_p(i)!=-1)
02589 {
02590 set_p(model->get_learn_p(i), get_p(model->get_learn_p(i))/sum);
02591 i++ ;
02592 } ;
02593
02594
02595 i=0;
02596 sum=0;
02597 while (model->get_learn_q(i)!=-1)
02598 {
02599 set_q(model->get_learn_q(i),CMath::random(MIN_RAND,1.0)) ;
02600 sum+=get_q(model->get_learn_q(i)) ;
02601 i++ ;
02602 } ;
02603 i=0 ;
02604 while (model->get_learn_q(i)!=-1)
02605 {
02606 set_q(model->get_learn_q(i), get_q(model->get_learn_q(i))/sum);
02607 i++ ;
02608 } ;
02609
02610
02611 invalidate_model();
02612 }
02613
02614 void CHMM::clear_model()
02615 {
02616 int32_t i,j;
02617 for (i=0; i<N; i++)
02618 {
02619 set_p(i, log(PSEUDO));
02620 set_q(i, log(PSEUDO));
02621
02622 for (j=0; j<N; j++)
02623 set_a(i,j, log(PSEUDO));
02624
02625 for (j=0; j<M; j++)
02626 set_b(i,j, log(PSEUDO));
02627 }
02628 }
02629
02630 void CHMM::clear_model_defined()
02631 {
02632 int32_t i,j,k;
02633
02634 for (i=0; (j=model->get_learn_p(i))!=-1; i++)
02635 set_p(j, log(PSEUDO));
02636
02637 for (i=0; (j=model->get_learn_q(i))!=-1; i++)
02638 set_q(j, log(PSEUDO));
02639
02640 for (i=0; (j=model->get_learn_a(i,0))!=-1; i++)
02641 {
02642 k=model->get_learn_a(i,1);
02643 set_a(j,k, log(PSEUDO));
02644 }
02645
02646 for (i=0; (j=model->get_learn_b(i,0))!=-1; i++)
02647 {
02648 k=model->get_learn_b(i,1);
02649 set_b(j,k, log(PSEUDO));
02650 }
02651 }
02652
02653 void CHMM::copy_model(CHMM* l)
02654 {
02655 int32_t i,j;
02656 for (i=0; i<N; i++)
02657 {
02658 set_p(i, l->get_p(i));
02659 set_q(i, l->get_q(i));
02660
02661 for (j=0; j<N; j++)
02662 set_a(i,j, l->get_a(i,j));
02663
02664 for (j=0; j<M; j++)
02665 set_b(i,j, l->get_b(i,j));
02666 }
02667 }
02668
02669 void CHMM::invalidate_model()
02670 {
02671
02672 this->mod_prob=0.0;
02673 this->mod_prob_updated=false;
02674
02675 if (mem_initialized)
02676 {
02677 if (trans_list_forward_cnt)
02678 delete[] trans_list_forward_cnt ;
02679 trans_list_forward_cnt=NULL ;
02680 if (trans_list_backward_cnt)
02681 delete[] trans_list_backward_cnt ;
02682 trans_list_backward_cnt=NULL ;
02683 if (trans_list_forward)
02684 {
02685 for (int32_t i=0; i<trans_list_len; i++)
02686 if (trans_list_forward[i])
02687 delete[] trans_list_forward[i] ;
02688 delete[] trans_list_forward ;
02689 trans_list_forward=NULL ;
02690 }
02691 if (trans_list_backward)
02692 {
02693 for (int32_t i=0; i<trans_list_len; i++)
02694 if (trans_list_backward[i])
02695 delete[] trans_list_backward[i] ;
02696 delete[] trans_list_backward ;
02697 trans_list_backward = NULL ;
02698 } ;
02699
02700 trans_list_len = N ;
02701 trans_list_forward = new T_STATES*[N] ;
02702 trans_list_forward_cnt = new T_STATES[N] ;
02703
02704 for (int32_t j=0; j<N; j++)
02705 {
02706 trans_list_forward_cnt[j]= 0 ;
02707 trans_list_forward[j]= new T_STATES[N] ;
02708 for (int32_t i=0; i<N; i++)
02709 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02710 {
02711 trans_list_forward[j][trans_list_forward_cnt[j]]=i ;
02712 trans_list_forward_cnt[j]++ ;
02713 }
02714 } ;
02715
02716 trans_list_backward = new T_STATES*[N] ;
02717 trans_list_backward_cnt = new T_STATES[N] ;
02718
02719 for (int32_t i=0; i<N; i++)
02720 {
02721 trans_list_backward_cnt[i]= 0 ;
02722 trans_list_backward[i]= new T_STATES[N] ;
02723 for (int32_t j=0; j<N; j++)
02724 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02725 {
02726 trans_list_backward[i][trans_list_backward_cnt[i]]=j ;
02727 trans_list_backward_cnt[i]++ ;
02728 }
02729 } ;
02730 } ;
02731 this->all_pat_prob=0.0;
02732 this->pat_prob=0.0;
02733 this->path_deriv_updated=false ;
02734 this->path_deriv_dimension=-1 ;
02735 this->all_path_prob_updated=false;
02736
02737 #ifdef USE_HMMPARALLEL_STRUCTURES
02738 {
02739 for (int32_t i=0; i<parallel->get_num_threads(); i++)
02740 {
02741 this->alpha_cache[i].updated=false;
02742 this->beta_cache[i].updated=false;
02743 path_prob_updated[i]=false ;
02744 path_prob_dimension[i]=-1 ;
02745 } ;
02746 }
02747 #else // USE_HMMPARALLEL_STRUCTURES
02748 this->alpha_cache.updated=false;
02749 this->beta_cache.updated=false;
02750 this->path_prob_dimension=-1;
02751 this->path_prob_updated=false;
02752
02753 #endif // USE_HMMPARALLEL_STRUCTURES
02754 }
02755
02756 void CHMM::open_bracket(FILE* file)
02757 {
02758 int32_t value;
02759 while (((value=fgetc(file)) != EOF) && (value!='['))
02760 {
02761 if (value=='\n')
02762 line++;
02763 }
02764
02765 if (value==EOF)
02766 error(line, "expected \"[\" in input file");
02767
02768 while (((value=fgetc(file)) != EOF) && (isspace(value)))
02769 {
02770 if (value=='\n')
02771 line++;
02772 }
02773
02774 ungetc(value, file);
02775 }
02776
02777 void CHMM::close_bracket(FILE* file)
02778 {
02779 int32_t value;
02780 while (((value=fgetc(file)) != EOF) && (value!=']'))
02781 {
02782 if (value=='\n')
02783 line++;
02784 }
02785
02786 if (value==EOF)
02787 error(line, "expected \"]\" in input file");
02788 }
02789
02790 bool CHMM::comma_or_space(FILE* file)
02791 {
02792 int32_t value;
02793 while (((value=fgetc(file)) != EOF) && (value!=',') && (value!=';') && (value!=']'))
02794 {
02795 if (value=='\n')
02796 line++;
02797 }
02798 if (value==']')
02799 {
02800 ungetc(value, file);
02801 SG_ERROR( "found ']' instead of ';' or ','\n") ;
02802 return false ;
02803 } ;
02804
02805 if (value==EOF)
02806 error(line, "expected \";\" or \",\" in input file");
02807
02808 while (((value=fgetc(file)) != EOF) && (isspace(value)))
02809 {
02810 if (value=='\n')
02811 line++;
02812 }
02813 ungetc(value, file);
02814 return true ;
02815 }
02816
02817 bool CHMM::get_numbuffer(FILE* file, char* buffer, int32_t length)
02818 {
02819 signed char value;
02820
02821 while (((value=fgetc(file)) != EOF) &&
02822 !isdigit(value) && (value!='A')
02823 && (value!='C') && (value!='G') && (value!='T')
02824 && (value!='N') && (value!='n')
02825 && (value!='.') && (value!='-') && (value!='e') && (value!=']'))
02826 {
02827 if (value=='\n')
02828 line++;
02829 }
02830 if (value==']')
02831 {
02832 ungetc(value,file) ;
02833 return false ;
02834 } ;
02835 if (value!=EOF)
02836 {
02837 int32_t i=0;
02838 switch (value)
02839 {
02840 case 'A':
02841 value='0' +CAlphabet::B_A;
02842 break;
02843 case 'C':
02844 value='0' +CAlphabet::B_C;
02845 break;
02846 case 'G':
02847 value='0' +CAlphabet::B_G;
02848 break;
02849 case 'T':
02850 value='0' +CAlphabet::B_T;
02851 break;
02852 };
02853
02854 buffer[i++]=value;
02855
02856 while (((value=fgetc(file)) != EOF) &&
02857 (isdigit(value) || (value=='.') || (value=='-') || (value=='e')
02858 || (value=='A') || (value=='C') || (value=='G')|| (value=='T')
02859 || (value=='N') || (value=='n')) && (i<length))
02860 {
02861 switch (value)
02862 {
02863 case 'A':
02864 value='0' +CAlphabet::B_A;
02865 break;
02866 case 'C':
02867 value='0' +CAlphabet::B_C;
02868 break;
02869 case 'G':
02870 value='0' +CAlphabet::B_G;
02871 break;
02872 case 'T':
02873 value='0' +CAlphabet::B_T;
02874 break;
02875 case '1': case '2': case'3': case '4': case'5':
02876 case '6': case '7': case'8': case '9': case '0': break ;
02877 case '.': case 'e': case '-': break ;
02878 default:
02879 SG_ERROR( "found crap: %i %c (pos:%li)\n",i,value,ftell(file)) ;
02880 };
02881 buffer[i++]=value;
02882 }
02883 ungetc(value, file);
02884 buffer[i]='\0';
02885
02886 return (i<=length) && (i>0);
02887 }
02888 return false;
02889 }
02890
02891
02892
02893
02894
02895
02896
02897
02898
02899
02900
02901
02902
02903
02904
02905
02906
02907
02908
02909
02910
02911
02912
02913
02914
02915
02916
02917
02918
02919
02920
02921
02922
02923
02924
02925
02926 bool CHMM::load_model(FILE* file)
02927 {
02928 int32_t received_params=0;
02929
02930 bool result=false;
02931 E_STATE state=INITIAL;
02932 char buffer[1024];
02933
02934 line=1;
02935 int32_t i,j;
02936
02937 if (file)
02938 {
02939 while (state!=END)
02940 {
02941 int32_t value=fgetc(file);
02942
02943 if (value=='\n')
02944 line++;
02945 if (value==EOF)
02946 state=END;
02947
02948 switch (state)
02949 {
02950 case INITIAL:
02951 if (value=='N')
02952 {
02953 if (received_params & GOTN)
02954 error(line, "in model file: \"p double defined\"");
02955 else
02956 state=GET_N;
02957 }
02958 else if (value=='M')
02959 {
02960 if (received_params & GOTM)
02961 error(line, "in model file: \"p double defined\"");
02962 else
02963 state=GET_M;
02964 }
02965 else if (value=='%')
02966 {
02967 state=COMMENT;
02968 }
02969 case ARRAYs:
02970 if (value=='p')
02971 {
02972 if (received_params & GOTp)
02973 error(line, "in model file: \"p double defined\"");
02974 else
02975 state=GET_p;
02976 }
02977 if (value=='q')
02978 {
02979 if (received_params & GOTq)
02980 error(line, "in model file: \"q double defined\"");
02981 else
02982 state=GET_q;
02983 }
02984 else if (value=='a')
02985 {
02986 if (received_params & GOTa)
02987 error(line, "in model file: \"a double defined\"");
02988 else
02989 state=GET_a;
02990 }
02991 else if (value=='b')
02992 {
02993 if (received_params & GOTb)
02994 error(line, "in model file: \"b double defined\"");
02995 else
02996 state=GET_b;
02997 }
02998 else if (value=='%')
02999 {
03000 state=COMMENT;
03001 }
03002 break;
03003 case GET_N:
03004 if (value=='=')
03005 {
03006 if (get_numbuffer(file, buffer, 4))
03007 {
03008 this->N= atoi(buffer);
03009 received_params|=GOTN;
03010 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03011 }
03012 else
03013 state=END;
03014 }
03015 break;
03016 case GET_M:
03017 if (value=='=')
03018 {
03019 if (get_numbuffer(file, buffer, 4))
03020 {
03021 this->M= atoi(buffer);
03022 received_params|=GOTM;
03023 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03024 }
03025 else
03026 state=END;
03027 }
03028 break;
03029 case GET_a:
03030 if (value=='=')
03031 {
03032 float64_t f;
03033
03034 transition_matrix_a=new float64_t[N*N];
03035 open_bracket(file);
03036 for (i=0; i<this->N; i++)
03037 {
03038 open_bracket(file);
03039
03040 for (j=0; j<this->N ; j++)
03041 {
03042
03043 if (fscanf( file, "%le", &f ) != 1)
03044 error(line, "float64_t expected");
03045 else
03046 set_a(i,j, f);
03047
03048 if (j<this->N-1)
03049 comma_or_space(file);
03050 else
03051 close_bracket(file);
03052 }
03053
03054 if (i<this->N-1)
03055 comma_or_space(file);
03056 else
03057 close_bracket(file);
03058 }
03059 received_params|=GOTa;
03060 }
03061 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03062 break;
03063 case GET_b:
03064 if (value=='=')
03065 {
03066 float64_t f;
03067
03068 observation_matrix_b=new float64_t[N*M];
03069 open_bracket(file);
03070 for (i=0; i<this->N; i++)
03071 {
03072 open_bracket(file);
03073
03074 for (j=0; j<this->M ; j++)
03075 {
03076
03077 if (fscanf( file, "%le", &f ) != 1)
03078 error(line, "float64_t expected");
03079 else
03080 set_b(i,j, f);
03081
03082 if (j<this->M-1)
03083 comma_or_space(file);
03084 else
03085 close_bracket(file);
03086 }
03087
03088 if (i<this->N-1)
03089 comma_or_space(file);
03090 else
03091 close_bracket(file);
03092 }
03093 received_params|=GOTb;
03094 }
03095 state= ((received_params & (GOTa | GOTb | GOTp | GOTq)) == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03096 break;
03097 case GET_p:
03098 if (value=='=')
03099 {
03100 float64_t f;
03101
03102 initial_state_distribution_p=new float64_t[N];
03103 open_bracket(file);
03104 for (i=0; i<this->N ; i++)
03105 {
03106 if (fscanf( file, "%le", &f ) != 1)
03107 error(line, "float64_t expected");
03108 else
03109 set_p(i, f);
03110
03111 if (i<this->N-1)
03112 comma_or_space(file);
03113 else
03114 close_bracket(file);
03115 }
03116 received_params|=GOTp;
03117 }
03118 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03119 break;
03120 case GET_q:
03121 if (value=='=')
03122 {
03123 float64_t f;
03124
03125 end_state_distribution_q=new float64_t[N];
03126 open_bracket(file);
03127 for (i=0; i<this->N ; i++)
03128 {
03129 if (fscanf( file, "%le", &f ) != 1)
03130 error(line, "float64_t expected");
03131 else
03132 set_q(i, f);
03133
03134 if (i<this->N-1)
03135 comma_or_space(file);
03136 else
03137 close_bracket(file);
03138 }
03139 received_params|=GOTq;
03140 }
03141 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03142 break;
03143 case COMMENT:
03144 if (value==EOF)
03145 state=END;
03146 else if (value=='\n')
03147 {
03148 line++;
03149 state=INITIAL;
03150 }
03151 break;
03152
03153 default:
03154 break;
03155 }
03156 }
03157 result= (received_params== (GOTa | GOTb | GOTp | GOTq | GOTN | GOTM | GOTO));
03158 }
03159
03160 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
03162 return result;
03163 }
03164
03165
03166
03167
03168
03169
03170
03171
03172
03173
03174
03175
03176
03177
03178
03179
03180
03181
03182
03183
03184
03185
03186
03187
03188
03189
03190
03191
03192
03193
03194
03195
03196
03197
03198
03199
03200
03201
03202
03203
03204
03205
03206
03207
03208
03209
03210
03211
03212
03213
03214
03215
03216
03217
03218
03219
03220
03221
03222
03223
03224 bool CHMM::load_definitions(FILE* file, bool verbose, bool initialize)
03225 {
03226 if (model)
03227 delete model ;
03228 model=new Model();
03229
03230 int32_t received_params=0x0000000;
03231 char buffer[1024];
03232
03233 bool result=false;
03234 E_STATE state=INITIAL;
03235
03236 {
03237 model->set_learn_a(0, -1);
03238 model->set_learn_a(1, -1);
03239 model->set_const_a(0, -1);
03240 model->set_const_a(1, -1);
03241 model->set_const_a_val(0, 1.0);
03242 model->set_learn_b(0, -1);
03243 model->set_const_b(0, -1);
03244 model->set_const_b_val(0, 1.0);
03245 model->set_learn_p(0, -1);
03246 model->set_learn_q(0, -1);
03247 model->set_const_p(0, -1);
03248 model->set_const_q(0, -1);
03249 } ;
03250
03251 line=1;
03252
03253 if (file)
03254 {
03255 while (state!=END)
03256 {
03257 int32_t value=fgetc(file);
03258
03259 if (value=='\n')
03260 line++;
03261
03262 if (value==EOF)
03263 state=END;
03264
03265 switch (state)
03266 {
03267 case INITIAL:
03268 if (value=='l')
03269 {
03270 if (fgetc(file)=='e' && fgetc(file)=='a' && fgetc(file)=='r' && fgetc(file)=='n' && fgetc(file)=='_')
03271 {
03272 switch(fgetc(file))
03273 {
03274 case 'a':
03275 state=GET_learn_a;
03276 break;
03277 case 'b':
03278 state=GET_learn_b;
03279 break;
03280 case 'p':
03281 state=GET_learn_p;
03282 break;
03283 case 'q':
03284 state=GET_learn_q;
03285 break;
03286 default:
03287 error(line, "a,b,p or q expected in train definition file");
03288 };
03289 }
03290 }
03291 else if (value=='c')
03292 {
03293 if (fgetc(file)=='o' && fgetc(file)=='n' && fgetc(file)=='s'
03294 && fgetc(file)=='t' && fgetc(file)=='_')
03295 {
03296 switch(fgetc(file))
03297 {
03298 case 'a':
03299 state=GET_const_a;
03300 break;
03301 case 'b':
03302 state=GET_const_b;
03303 break;
03304 case 'p':
03305 state=GET_const_p;
03306 break;
03307 case 'q':
03308 state=GET_const_q;
03309 break;
03310 default:
03311 error(line, "a,b,p or q expected in train definition file");
03312 };
03313 }
03314 }
03315 else if (value=='%')
03316 {
03317 state=COMMENT;
03318 }
03319 else if (value==EOF)
03320 {
03321 state=END;
03322 }
03323 break;
03324 case GET_learn_a:
03325 if (value=='=')
03326 {
03327 open_bracket(file);
03328 bool finished=false;
03329 int32_t i=0;
03330
03331 if (verbose)
03332 SG_DEBUG( "\nlearn for transition matrix: ") ;
03333 while (!finished)
03334 {
03335 open_bracket(file);
03336
03337 if (get_numbuffer(file, buffer, 4))
03338 {
03339 value=atoi(buffer);
03340 model->set_learn_a(i++, value);
03341
03342 if (value<0)
03343 {
03344 finished=true;
03345 break;
03346 }
03347 if (value>=N)
03348 SG_ERROR( "invalid value for learn_a(%i,0): %i\n",i/2,(int)value) ;
03349 }
03350 else
03351 break;
03352
03353 comma_or_space(file);
03354
03355 if (get_numbuffer(file, buffer, 4))
03356 {
03357 value=atoi(buffer);
03358 model->set_learn_a(i++, value);
03359
03360 if (value<0)
03361 {
03362 finished=true;
03363 break;
03364 }
03365 if (value>=N)
03366 SG_ERROR( "invalid value for learn_a(%i,1): %i\n",i/2-1,(int)value) ;
03367
03368 }
03369 else
03370 break;
03371 close_bracket(file);
03372 }
03373 close_bracket(file);
03374 if (verbose)
03375 SG_DEBUG( "%i Entries",(int)(i/2)) ;
03376
03377 if (finished)
03378 {
03379 received_params|=GOTlearn_a;
03380
03381 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03382 }
03383 else
03384 state=END;
03385 }
03386 break;
03387 case GET_learn_b:
03388 if (value=='=')
03389 {
03390 open_bracket(file);
03391 bool finished=false;
03392 int32_t i=0;
03393
03394 if (verbose)
03395 SG_DEBUG( "\nlearn for emission matrix: ") ;
03396
03397 while (!finished)
03398 {
03399 open_bracket(file);
03400
03401 int32_t combine=0;
03402
03403 for (int32_t j=0; j<2; j++)
03404 {
03405 if (get_numbuffer(file, buffer, 4))
03406 {
03407 value=atoi(buffer);
03408
03409 if (j==0)
03410 {
03411 model->set_learn_b(i++, value);
03412
03413 if (value<0)
03414 {
03415 finished=true;
03416 break;
03417 }
03418 if (value>=N)
03419 SG_ERROR( "invalid value for learn_b(%i,0): %i\n",i/2,(int)value) ;
03420 }
03421 else
03422 combine=value;
03423 }
03424 else
03425 break;
03426
03427 if (j<1)
03428 comma_or_space(file);
03429 else
03430 close_bracket(file);
03431 }
03432 model->set_learn_b(i++, combine);
03433 if (combine>=M)
03434
03435 SG_ERROR("invalid value for learn_b(%i,1): %i\n",i/2-1,(int)value) ;
03436 }
03437 close_bracket(file);
03438 if (verbose)
03439 SG_DEBUG( "%i Entries",(int)(i/2-1)) ;
03440
03441 if (finished)
03442 {
03443 received_params|=GOTlearn_b;
03444 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03445 }
03446 else
03447 state=END;
03448 }
03449 break;
03450 case GET_learn_p:
03451 if (value=='=')
03452 {
03453 open_bracket(file);
03454 bool finished=false;
03455 int32_t i=0;
03456
03457 if (verbose)
03458 SG_DEBUG( "\nlearn start states: ") ;
03459 while (!finished)
03460 {
03461 if (get_numbuffer(file, buffer, 4))
03462 {
03463 value=atoi(buffer);
03464
03465 model->set_learn_p(i++, value);
03466
03467 if (value<0)
03468 {
03469 finished=true;
03470 break;
03471 }
03472 if (value>=N)
03473 SG_ERROR( "invalid value for learn_p(%i): %i\n",i-1,(int)value) ;
03474 }
03475 else
03476 break;
03477
03478 comma_or_space(file);
03479 }
03480
03481 close_bracket(file);
03482 if (verbose)
03483 SG_DEBUG( "%i Entries",i-1) ;
03484
03485 if (finished)
03486 {
03487 received_params|=GOTlearn_p;
03488 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03489 }
03490 else
03491 state=END;
03492 }
03493 break;
03494 case GET_learn_q:
03495 if (value=='=')
03496 {
03497 open_bracket(file);
03498 bool finished=false;
03499 int32_t i=0;
03500
03501 if (verbose)
03502 SG_DEBUG( "\nlearn terminal states: ") ;
03503 while (!finished)
03504 {
03505 if (get_numbuffer(file, buffer, 4))
03506 {
03507 value=atoi(buffer);
03508 model->set_learn_q(i++, value);
03509
03510 if (value<0)
03511 {
03512 finished=true;
03513 break;
03514 }
03515 if (value>=N)
03516 SG_ERROR( "invalid value for learn_q(%i): %i\n",i-1,(int)value) ;
03517 }
03518 else
03519 break;
03520
03521 comma_or_space(file);
03522 }
03523
03524 close_bracket(file);
03525 if (verbose)
03526 SG_DEBUG( "%i Entries",i-1) ;
03527
03528 if (finished)
03529 {
03530 received_params|=GOTlearn_q;
03531 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03532 }
03533 else
03534 state=END;
03535 }
03536 break;
03537 case GET_const_a:
03538 if (value=='=')
03539 {
03540 open_bracket(file);
03541 bool finished=false;
03542 int32_t i=0;
03543
03544 if (verbose)
03545 #ifdef USE_HMMDEBUG
03546 SG_DEBUG( "\nconst for transition matrix: \n") ;
03547 #else
03548 SG_DEBUG( "\nconst for transition matrix: ") ;
03549 #endif
03550 while (!finished)
03551 {
03552 open_bracket(file);
03553
03554 if (get_numbuffer(file, buffer, 4))
03555 {
03556 value=atoi(buffer);
03557 model->set_const_a(i++, value);
03558
03559 if (value<0)
03560 {
03561 finished=true;
03562 model->set_const_a(i++, value);
03563 model->set_const_a_val((int32_t)i/2 - 1, value);
03564 break;
03565 }
03566 if (value>=N)
03567 SG_ERROR( "invalid value for const_a(%i,0): %i\n",i/2,(int)value) ;
03568 }
03569 else
03570 break;
03571
03572 comma_or_space(file);
03573
03574 if (get_numbuffer(file, buffer, 4))
03575 {
03576 value=atoi(buffer);
03577 model->set_const_a(i++, value);
03578
03579 if (value<0)
03580 {
03581 finished=true;
03582 model->set_const_a_val((int32_t)i/2 - 1, value);
03583 break;
03584 }
03585 if (value>=N)
03586 SG_ERROR( "invalid value for const_a(%i,1): %i\n",i/2-1,(int)value) ;
03587 }
03588 else
03589 break;
03590
03591 if (!comma_or_space(file))
03592 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03593 else
03594 if (get_numbuffer(file, buffer, 10))
03595 {
03596 float64_t dvalue=atof(buffer);
03597 model->set_const_a_val((int32_t)i/2 - 1, dvalue);
03598 if (dvalue<0)
03599 {
03600 finished=true;
03601 break;
03602 }
03603 if ((dvalue>1.0) || (dvalue<0.0))
03604 SG_ERROR( "invalid value for const_a_val(%i): %e\n",(int)i/2-1,dvalue) ;
03605 }
03606 else
03607 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03608
03609 #ifdef USE_HMMDEBUG
03610 if (verbose)
03611 SG_ERROR("const_a(%i,%i)=%e\n", model->get_const_a((int32_t)i/2-1,0),model->get_const_a((int32_t)i/2-1,1),model->get_const_a_val((int32_t)i/2-1)) ;
03612 #endif
03613 close_bracket(file);
03614 }
03615 close_bracket(file);
03616 if (verbose)
03617 SG_DEBUG( "%i Entries",(int)i/2-1) ;
03618
03619 if (finished)
03620 {
03621 received_params|=GOTconst_a;
03622 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03623 }
03624 else
03625 state=END;
03626 }
03627 break;
03628
03629 case GET_const_b:
03630 if (value=='=')
03631 {
03632 open_bracket(file);
03633 bool finished=false;
03634 int32_t i=0;
03635
03636 if (verbose)
03637 #ifdef USE_HMMDEBUG
03638 SG_DEBUG( "\nconst for emission matrix: \n") ;
03639 #else
03640 SG_DEBUG( "\nconst for emission matrix: ") ;
03641 #endif
03642 while (!finished)
03643 {
03644 open_bracket(file);
03645 int32_t combine=0;
03646 for (int32_t j=0; j<3; j++)
03647 {
03648 if (get_numbuffer(file, buffer, 10))
03649 {
03650 if (j==0)
03651 {
03652 value=atoi(buffer);
03653
03654 model->set_const_b(i++, value);
03655
03656 if (value<0)
03657 {
03658 finished=true;
03659
03660 break;
03661 }
03662 if (value>=N)
03663 SG_ERROR( "invalid value for const_b(%i,0): %i\n",i/2-1,(int)value) ;
03664 }
03665 else if (j==2)
03666 {
03667 float64_t dvalue=atof(buffer);
03668 model->set_const_b_val((int32_t)(i-1)/2, dvalue);
03669 if (dvalue<0)
03670 {
03671 finished=true;
03672 break;
03673 } ;
03674 if ((dvalue>1.0) || (dvalue<0.0))
03675 SG_ERROR( "invalid value for const_b_val(%i,1): %e\n",i/2-1,dvalue) ;
03676 }
03677 else
03678 {
03679 value=atoi(buffer);
03680 combine= value;
03681 } ;
03682 }
03683 else
03684 {
03685 if (j==2)
03686 model->set_const_b_val((int32_t)(i-1)/2, 1.0);
03687 break;
03688 } ;
03689 if (j<2)
03690 if ((!comma_or_space(file)) && (j==1))
03691 {
03692 model->set_const_b_val((int32_t)(i-1)/2, 1.0) ;
03693 break ;
03694 } ;
03695 }
03696 close_bracket(file);
03697 model->set_const_b(i++, combine);
03698 if (combine>=M)
03699 SG_ERROR("invalid value for const_b(%i,1): %i\n",i/2-1, combine) ;
03700 #ifdef USE_HMMDEBUG
03701 if (verbose && !finished)
03702 SG_ERROR("const_b(%i,%i)=%e\n", model->get_const_b((int32_t)i/2-1,0),model->get_const_b((int32_t)i/2-1,1),model->get_const_b_val((int32_t)i/2-1)) ;
03703 #endif
03704 }
03705 close_bracket(file);
03706 if (verbose)
03707 SG_ERROR( "%i Entries",(int)i/2-1) ;
03708
03709 if (finished)
03710 {
03711 received_params|=GOTconst_b;
03712 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03713 }
03714 else
03715 state=END;
03716 }
03717 break;
03718 case GET_const_p:
03719 if (value=='=')
03720 {
03721 open_bracket(file);
03722 bool finished=false;
03723 int32_t i=0;
03724
03725 if (verbose)
03726 #ifdef USE_HMMDEBUG
03727 SG_DEBUG( "\nconst for start states: \n") ;
03728 #else
03729 SG_DEBUG( "\nconst for start states: ") ;
03730 #endif
03731 while (!finished)
03732 {
03733 open_bracket(file);
03734
03735 if (get_numbuffer(file, buffer, 4))
03736 {
03737 value=atoi(buffer);
03738 model->set_const_p(i, value);
03739
03740 if (value<0)
03741 {
03742 finished=true;
03743 model->set_const_p_val(i++, value);
03744 break;
03745 }
03746 if (value>=N)
03747 SG_ERROR( "invalid value for const_p(%i): %i\n",i,(int)value) ;
03748
03749 }
03750 else
03751 break;
03752
03753 if (!comma_or_space(file))
03754 model->set_const_p_val(i++, 1.0);
03755 else
03756 if (get_numbuffer(file, buffer, 10))
03757 {
03758 float64_t dvalue=atof(buffer);
03759 model->set_const_p_val(i++, dvalue);
03760 if (dvalue<0)
03761 {
03762 finished=true;
03763 break;
03764 }
03765 if ((dvalue>1) || (dvalue<0))
03766 SG_ERROR( "invalid value for const_p_val(%i): %e\n",i,dvalue) ;
03767 }
03768 else
03769 model->set_const_p_val(i++, 1.0);
03770
03771 close_bracket(file);
03772
03773 #ifdef USE_HMMDEBUG
03774 if (verbose)
03775 SG_DEBUG("const_p(%i)=%e\n", model->get_const_p(i-1),model->get_const_p_val(i-1)) ;
03776 #endif
03777 }
03778 if (verbose)
03779 SG_DEBUG( "%i Entries",i-1) ;
03780
03781 close_bracket(file);
03782
03783 if (finished)
03784 {
03785 received_params|=GOTconst_p;
03786 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03787 }
03788 else
03789 state=END;
03790 }
03791 break;
03792 case GET_const_q:
03793 if (value=='=')
03794 {
03795 open_bracket(file);
03796 bool finished=false;
03797 if (verbose)
03798 #ifdef USE_HMMDEBUG
03799 SG_DEBUG( "\nconst for terminal states: \n") ;
03800 #else
03801 SG_DEBUG( "\nconst for terminal states: ") ;
03802 #endif
03803 int32_t i=0;
03804
03805 while (!finished)
03806 {
03807 open_bracket(file) ;
03808 if (get_numbuffer(file, buffer, 4))
03809 {
03810 value=atoi(buffer);
03811 model->set_const_q(i, value);
03812 if (value<0)
03813 {
03814 finished=true;
03815 model->set_const_q_val(i++, value);
03816 break;
03817 }
03818 if (value>=N)
03819 SG_ERROR( "invalid value for const_q(%i): %i\n",i,(int)value) ;
03820 }
03821 else
03822 break;
03823
03824 if (!comma_or_space(file))
03825 model->set_const_q_val(i++, 1.0);
03826 else
03827 if (get_numbuffer(file, buffer, 10))
03828 {
03829 float64_t dvalue=atof(buffer);
03830 model->set_const_q_val(i++, dvalue);
03831 if (dvalue<0)
03832 {
03833 finished=true;
03834 break;
03835 }
03836 if ((dvalue>1) || (dvalue<0))
03837 SG_ERROR("invalid value for const_q_val(%i): %e\n",i,(double) dvalue) ;
03838 }
03839 else
03840 model->set_const_q_val(i++, 1.0);
03841
03842 close_bracket(file);
03843 #ifdef USE_HMMDEBUG
03844 if (verbose)
03845 SG_DEBUG("const_q(%i)=%e\n", model->get_const_q(i-1),model->get_const_q_val(i-1)) ;
03846 #endif
03847 }
03848 if (verbose)
03849 SG_DEBUG( "%i Entries",i-1) ;
03850
03851 close_bracket(file);
03852
03853 if (finished)
03854 {
03855 received_params|=GOTconst_q;
03856 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03857 }
03858 else
03859 state=END;
03860 }
03861 break;
03862 case COMMENT:
03863 if (value==EOF)
03864 state=END;
03865 else if (value=='\n')
03866 state=INITIAL;
03867 break;
03868
03869 default:
03870 break;
03871 }
03872 }
03873 }
03874
03875
03876
03877
03878
03879 result=1 ;
03880 if (result)
03881 {
03882 model->sort_learn_a() ;
03883 model->sort_learn_b() ;
03884 if (initialize)
03885 {
03886 init_model_defined(); ;
03887 convert_to_log();
03888 } ;
03889 }
03890 if (verbose)
03891 SG_DEBUG( "\n") ;
03892 return result;
03893 }
03894
03895
03896
03897
03898
03899
03900
03901
03902
03903
03904
03905
03906
03907
03908
03909
03910
03911
03912
03913
03914
03915
03916
03917
03918
03919
03920
03921
03922
03923
03924
03925
03926
03927
03928
03929 bool CHMM::save_model(FILE* file)
03930 {
03931 bool result=false;
03932 int32_t i,j;
03933 const float32_t NAN_REPLACEMENT = (float32_t) CMath::ALMOST_NEG_INFTY ;
03934
03935 if (file)
03936 {
03937 fprintf(file,"%s","% HMM - specification\n% N - number of states\n% M - number of observation_tokens\n% a is state_transition_matrix\n% size(a)= [N,N]\n%\n% b is observation_per_state_matrix\n% size(b)= [N,M]\n%\n% p is initial distribution\n% size(p)= [1, N]\n\n% q is distribution of end states\n% size(q)= [1, N]\n");
03938 fprintf(file,"N=%d;\n",N);
03939 fprintf(file,"M=%d;\n",M);
03940
03941 fprintf(file,"p=[");
03942 for (i=0; i<N; i++)
03943 {
03944 if (i<N-1) {
03945 if (CMath::is_finite(get_p(i)))
03946 fprintf(file, "%e,", (double)get_p(i));
03947 else
03948 fprintf(file, "%f,", NAN_REPLACEMENT);
03949 }
03950 else {
03951 if (CMath::is_finite(get_p(i)))
03952 fprintf(file, "%e", (double)get_p(i));
03953 else
03954 fprintf(file, "%f", NAN_REPLACEMENT);
03955 }
03956 }
03957
03958 fprintf(file,"];\n\nq=[");
03959 for (i=0; i<N; i++)
03960 {
03961 if (i<N-1) {
03962 if (CMath::is_finite(get_q(i)))
03963 fprintf(file, "%e,", (double)get_q(i));
03964 else
03965 fprintf(file, "%f,", NAN_REPLACEMENT);
03966 }
03967 else {
03968 if (CMath::is_finite(get_q(i)))
03969 fprintf(file, "%e", (double)get_q(i));
03970 else
03971 fprintf(file, "%f", NAN_REPLACEMENT);
03972 }
03973 }
03974 fprintf(file,"];\n\na=[");
03975
03976 for (i=0; i<N; i++)
03977 {
03978 fprintf(file, "\t[");
03979
03980 for (j=0; j<N; j++)
03981 {
03982 if (j<N-1) {
03983 if (CMath::is_finite(get_a(i,j)))
03984 fprintf(file, "%e,", (double)get_a(i,j));
03985 else
03986 fprintf(file, "%f,", NAN_REPLACEMENT);
03987 }
03988 else {
03989 if (CMath::is_finite(get_a(i,j)))
03990 fprintf(file, "%e];\n", (double)get_a(i,j));
03991 else
03992 fprintf(file, "%f];\n", NAN_REPLACEMENT);
03993 }
03994 }
03995 }
03996
03997 fprintf(file," ];\n\nb=[");
03998
03999 for (i=0; i<N; i++)
04000 {
04001 fprintf(file, "\t[");
04002
04003 for (j=0; j<M; j++)
04004 {
04005 if (j<M-1) {
04006 if (CMath::is_finite(get_b(i,j)))
04007 fprintf(file, "%e,", (double)get_b(i,j));
04008 else
04009 fprintf(file, "%f,", NAN_REPLACEMENT);
04010 }
04011 else {
04012 if (CMath::is_finite(get_b(i,j)))
04013 fprintf(file, "%e];\n", (double)get_b(i,j));
04014 else
04015 fprintf(file, "%f];\n", NAN_REPLACEMENT);
04016 }
04017 }
04018 }
04019 result= (fprintf(file," ];\n") == 5);
04020 }
04021
04022 return result;
04023 }
04024
04025 T_STATES* CHMM::get_path(int32_t dim, float64_t& prob)
04026 {
04027 T_STATES* result = NULL;
04028
04029 prob = best_path(dim);
04030 result = new T_STATES[p_observations->get_vector_length(dim)];
04031
04032 for (int32_t i=0; i<p_observations->get_vector_length(dim); i++)
04033 result[i]=PATH(dim)[i];
04034
04035 return result;
04036 }
04037
04038 bool CHMM::save_path(FILE* file)
04039 {
04040 bool result=false;
04041
04042 if (file)
04043 {
04044 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04045 {
04046 if (dim%100==0)
04047 SG_PRINT( "%i..", dim) ;
04048 float64_t prob = best_path(dim);
04049 fprintf(file,"%i. path probability:%e\nstate sequence:\n", dim, prob);
04050 for (int32_t i=0; i<p_observations->get_vector_length(dim)-1; i++)
04051 fprintf(file,"%d ", PATH(dim)[i]);
04052 fprintf(file,"%d", PATH(dim)[p_observations->get_vector_length(dim)-1]);
04053 fprintf(file,"\n\n") ;
04054 }
04055 SG_DONE();
04056 result=true;
04057 }
04058
04059 return result;
04060 }
04061
04062 bool CHMM::save_likelihood_bin(FILE* file)
04063 {
04064 bool result=false;
04065
04066 if (file)
04067 {
04068 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04069 {
04070 float32_t prob= (float32_t) model_probability(dim);
04071 fwrite(&prob, sizeof(float32_t), 1, file);
04072 }
04073 result=true;
04074 }
04075
04076 return result;
04077 }
04078
04079 bool CHMM::save_likelihood(FILE* file)
04080 {
04081 bool result=false;
04082
04083 if (file)
04084 {
04085 fprintf(file, "%% likelihood of model per observation\n%% P[O|model]=[ P[O|model]_1 P[O|model]_2 ... P[O|model]_dim ]\n%%\n");
04086
04087 fprintf(file, "P=[");
04088 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04089 fprintf(file, "%e ", (double) model_probability(dim));
04090
04091 fprintf(file,"];");
04092 result=true;
04093 }
04094
04095 return result;
04096 }
04097
04098 #define FLOATWRITE(file, value) { float32_t rrr=float32_t(value); fwrite(&rrr, sizeof(float32_t), 1, file); num_floats++;}
04099
04100 bool CHMM::save_model_bin(FILE* file)
04101 {
04102 int32_t i,j,q, num_floats=0 ;
04103 if (!model)
04104 {
04105 if (file)
04106 {
04107
04108 FLOATWRITE(file, (float32_t)CMath::INFTY);
04109 FLOATWRITE(file, (float32_t) 1);
04110
04111
04112 for (i=0; i<N; i++)
04113 FLOATWRITE(file, get_p(i));
04114 SG_INFO( "wrote %i parameters for p\n",N) ;
04115
04116 for (i=0; i<N; i++)
04117 FLOATWRITE(file, get_q(i)) ;
04118 SG_INFO( "wrote %i parameters for q\n",N) ;
04119
04120
04121 for (i=0; i<N; i++)
04122 for (j=0; j<N; j++)
04123 FLOATWRITE(file, get_a(i,j));
04124 SG_INFO( "wrote %i parameters for a\n",N*N) ;
04125
04126 for (i=0; i<N; i++)
04127 for (j=0; j<M; j++)
04128 FLOATWRITE(file, get_b(i,j));
04129 SG_INFO( "wrote %i parameters for b\n",N*M) ;
04130
04131
04132 FLOATWRITE(file, (float32_t)CMath::INFTY);
04133 FLOATWRITE(file, (float32_t) 3);
04134
04135
04136 FLOATWRITE(file, (float32_t) N);
04137 FLOATWRITE(file, (float32_t) N);
04138 FLOATWRITE(file, (float32_t) N*N);
04139 FLOATWRITE(file, (float32_t) N*M);
04140 FLOATWRITE(file, (float32_t) N);
04141 FLOATWRITE(file, (float32_t) M);
04142 } ;
04143 }
04144 else
04145 {
04146 if (file)
04147 {
04148 int32_t num_p, num_q, num_a, num_b ;
04149
04150 FLOATWRITE(file, (float32_t)CMath::INFTY);
04151 FLOATWRITE(file, (float32_t) 2);
04152
04153 for (i=0; model->get_learn_p(i)>=0; i++)
04154 FLOATWRITE(file, get_p(model->get_learn_p(i)));
04155 num_p=i ;
04156 SG_INFO( "wrote %i parameters for p\n",num_p) ;
04157
04158 for (i=0; model->get_learn_q(i)>=0; i++)
04159 FLOATWRITE(file, get_q(model->get_learn_q(i)));
04160 num_q=i ;
04161 SG_INFO( "wrote %i parameters for q\n",num_q) ;
04162
04163
04164 for (q=0; model->get_learn_a(q,1)>=0; q++)
04165 {
04166 i=model->get_learn_a(q,0) ;
04167 j=model->get_learn_a(q,1) ;
04168 FLOATWRITE(file, (float32_t)i);
04169 FLOATWRITE(file, (float32_t)j);
04170 FLOATWRITE(file, get_a(i,j));
04171 } ;
04172 num_a=q ;
04173 SG_INFO( "wrote %i parameters for a\n",num_a) ;
04174
04175 for (q=0; model->get_learn_b(q,0)>=0; q++)
04176 {
04177 i=model->get_learn_b(q,0) ;
04178 j=model->get_learn_b(q,1) ;
04179 FLOATWRITE(file, (float32_t)i);
04180 FLOATWRITE(file, (float32_t)j);
04181 FLOATWRITE(file, get_b(i,j));
04182 } ;
04183 num_b=q ;
04184 SG_INFO( "wrote %i parameters for b\n",num_b) ;
04185
04186
04187 FLOATWRITE(file, (float32_t)CMath::INFTY);
04188 FLOATWRITE(file, (float32_t) 3);
04189
04190
04191 FLOATWRITE(file, (float32_t) num_p);
04192 FLOATWRITE(file, (float32_t) num_q);
04193 FLOATWRITE(file, (float32_t) num_a);
04194 FLOATWRITE(file, (float32_t) num_b);
04195 FLOATWRITE(file, (float32_t) N);
04196 FLOATWRITE(file, (float32_t) M);
04197 } ;
04198 } ;
04199 return true ;
04200 }
04201
04202 bool CHMM::save_path_derivatives(FILE* logfile)
04203 {
04204 int32_t dim,i,j;
04205 float64_t prob;
04206
04207 if (logfile)
04208 {
04209 fprintf(logfile,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%% \n%% calculating derivatives of P[O,Q|lambda]=p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04210 fprintf(logfile,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04211 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04212 fprintf(logfile,"%% ............................. \n");
04213 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04214 fprintf(logfile,"%% ];\n%%\n\ndPr(log()) = [\n");
04215 }
04216 else
04217 return false ;
04218
04219 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04220 {
04221 prob=best_path(dim);
04222
04223 fprintf(logfile, "[ ");
04224
04225
04226 for (i=0; i<N; i++)
04227 fprintf(logfile,"%e, ", (double) path_derivative_p(i,dim) );
04228
04229 for (i=0; i<N; i++)
04230 fprintf(logfile,"%e, ", (double) path_derivative_q(i,dim) );
04231
04232
04233 for (i=0; i<N; i++)
04234 for (j=0; j<N; j++)
04235 fprintf(logfile, "%e,", (double) path_derivative_a(i,j,dim) );
04236
04237 for (i=0; i<N; i++)
04238 for (j=0; j<M; j++)
04239 fprintf(logfile, "%e,", (double) path_derivative_b(i,j,dim) );
04240
04241 fseek(logfile,ftell(logfile)-1,SEEK_SET);
04242 fprintf(logfile, " ];\n");
04243 }
04244
04245 fprintf(logfile, "];");
04246
04247 return true ;
04248 }
04249
04250 bool CHMM::save_path_derivatives_bin(FILE* logfile)
04251 {
04252 bool result=false;
04253 int32_t dim,i,j,q;
04254 float64_t prob=0 ;
04255 int32_t num_floats=0 ;
04256
04257 float64_t sum_prob=0.0 ;
04258 if (!model)
04259 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04260 else
04261 SG_INFO( "writing derivatives of changed weights only\n") ;
04262
04263 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04264 {
04265 if (dim%100==0)
04266 {
04267 SG_PRINT( ".") ;
04268
04269 } ;
04270
04271 prob=best_path(dim);
04272 sum_prob+=prob ;
04273
04274 if (!model)
04275 {
04276 if (logfile)
04277 {
04278
04279 FLOATWRITE(logfile, prob);
04280
04281 for (i=0; i<N; i++)
04282 FLOATWRITE(logfile, path_derivative_p(i,dim));
04283
04284 for (i=0; i<N; i++)
04285 FLOATWRITE(logfile, path_derivative_q(i,dim));
04286
04287 for (i=0; i<N; i++)
04288 for (j=0; j<N; j++)
04289 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04290
04291 for (i=0; i<N; i++)
04292 for (j=0; j<M; j++)
04293 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04294
04295 }
04296 }
04297 else
04298 {
04299 if (logfile)
04300 {
04301
04302 FLOATWRITE(logfile, prob);
04303
04304 for (i=0; model->get_learn_p(i)>=0; i++)
04305 FLOATWRITE(logfile, path_derivative_p(model->get_learn_p(i),dim));
04306
04307 for (i=0; model->get_learn_q(i)>=0; i++)
04308 FLOATWRITE(logfile, path_derivative_q(model->get_learn_q(i),dim));
04309
04310 for (q=0; model->get_learn_a(q,0)>=0; q++)
04311 {
04312 i=model->get_learn_a(q,0) ;
04313 j=model->get_learn_a(q,1) ;
04314 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04315 } ;
04316
04317 for (q=0; model->get_learn_b(q,0)>=0; q++)
04318 {
04319 i=model->get_learn_b(q,0) ;
04320 j=model->get_learn_b(q,1) ;
04321 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04322 } ;
04323 }
04324 } ;
04325 }
04326 save_model_bin(logfile) ;
04327
04328 result=true;
04329 SG_PRINT( "\n") ;
04330 return result;
04331 }
04332
04333 bool CHMM::save_model_derivatives_bin(FILE* file)
04334 {
04335 bool result=false;
04336 int32_t dim,i,j,q ;
04337 int32_t num_floats=0 ;
04338
04339 if (!model)
04340 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04341 else
04342 SG_INFO( "writing derivatives of changed weights only\n") ;
04343
04344 #ifdef USE_HMMPARALLEL
04345 int32_t num_threads = parallel->get_num_threads();
04346 pthread_t *threads=new pthread_t[num_threads] ;
04347 S_DIM_THREAD_PARAM *params=new S_DIM_THREAD_PARAM[num_threads] ;
04348
04349 if (p_observations->get_num_vectors()<num_threads)
04350 num_threads=p_observations->get_num_vectors();
04351 #endif
04352
04353 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04354 {
04355 if (dim%20==0)
04356 {
04357 SG_PRINT( ".") ;
04358
04359 } ;
04360
04361 #ifdef USE_HMMPARALLEL
04362 if (dim%num_threads==0)
04363 {
04364 for (i=0; i<num_threads; i++)
04365 {
04366 if (dim+i<p_observations->get_num_vectors())
04367 {
04368 params[i].hmm=this ;
04369 params[i].dim=dim+i ;
04370 pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)¶ms[i]) ;
04371 }
04372 }
04373
04374 for (i=0; i<num_threads; i++)
04375 {
04376 if (dim+i<p_observations->get_num_vectors())
04377 pthread_join(threads[i], NULL);
04378 }
04379 }
04380 #endif
04381
04382 float64_t prob=model_probability(dim) ;
04383 if (!model)
04384 {
04385 if (file)
04386 {
04387
04388 FLOATWRITE(file, prob);
04389
04390
04391 for (i=0; i<N; i++)
04392 FLOATWRITE(file, model_derivative_p(i,dim));
04393
04394 for (i=0; i<N; i++)
04395 FLOATWRITE(file, model_derivative_q(i,dim));
04396
04397
04398 for (i=0; i<N; i++)
04399 for (j=0; j<N; j++)
04400 FLOATWRITE(file, model_derivative_a(i,j,dim));
04401
04402 for (i=0; i<N; i++)
04403 for (j=0; j<M; j++)
04404 FLOATWRITE(file, model_derivative_b(i,j,dim));
04405
04406 if (dim==0)
04407 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04408 } ;
04409 }
04410 else
04411 {
04412 if (file)
04413 {
04414
04415 FLOATWRITE(file, prob);
04416
04417 for (i=0; model->get_learn_p(i)>=0; i++)
04418 FLOATWRITE(file, model_derivative_p(model->get_learn_p(i),dim));
04419
04420 for (i=0; model->get_learn_q(i)>=0; i++)
04421 FLOATWRITE(file, model_derivative_q(model->get_learn_q(i),dim));
04422
04423
04424 for (q=0; model->get_learn_a(q,1)>=0; q++)
04425 {
04426 i=model->get_learn_a(q,0) ;
04427 j=model->get_learn_a(q,1) ;
04428 FLOATWRITE(file, model_derivative_a(i,j,dim));
04429 } ;
04430
04431 for (q=0; model->get_learn_b(q,0)>=0; q++)
04432 {
04433 i=model->get_learn_b(q,0) ;
04434 j=model->get_learn_b(q,1) ;
04435 FLOATWRITE(file, model_derivative_b(i,j,dim));
04436 } ;
04437 if (dim==0)
04438 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04439 } ;
04440 } ;
04441 }
04442 save_model_bin(file) ;
04443
04444 #ifdef USE_HMMPARALLEL
04445 delete[] threads ;
04446 delete[] params ;
04447 #endif
04448
04449 result=true;
04450 SG_PRINT( "\n") ;
04451 return result;
04452 }
04453
04454 bool CHMM::save_model_derivatives(FILE* file)
04455 {
04456 bool result=false;
04457 int32_t dim,i,j;
04458
04459 if (file)
04460 {
04461
04462 fprintf(file,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%%\n%% calculating derivatives of P[O|lambda]=sum_{all Q}p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04463 fprintf(file,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04464 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04465 fprintf(file,"%% ............................. \n");
04466 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04467 fprintf(file,"%% ];\n%%\n\nlog(dPr) = [\n");
04468
04469
04470 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04471 {
04472 fprintf(file, "[ ");
04473
04474
04475 for (i=0; i<N; i++)
04476 fprintf(file,"%e, ", (double) model_derivative_p(i, dim) );
04477
04478 for (i=0; i<N; i++)
04479 fprintf(file,"%e, ", (double) model_derivative_q(i, dim) );
04480
04481
04482 for (i=0; i<N; i++)
04483 for (j=0; j<N; j++)
04484 fprintf(file, "%e,", (double) model_derivative_a(i,j,dim) );
04485
04486 for (i=0; i<N; i++)
04487 for (j=0; j<M; j++)
04488 fprintf(file, "%e,", (double) model_derivative_b(i,j,dim) );
04489
04490 fseek(file,ftell(file)-1,SEEK_SET);
04491 fprintf(file, " ];\n");
04492 }
04493
04494
04495 fprintf(file, "];");
04496
04497 result=true;
04498 }
04499 return result;
04500 }
04501
04502 bool CHMM::check_model_derivatives_combined()
04503 {
04504
04505 const float64_t delta=5e-4 ;
04506
04507 int32_t i ;
04508
04509
04510
04511
04512
04513
04514
04515
04516
04517
04518
04519
04520
04521
04522
04523
04524
04525
04526
04527
04528
04529
04530
04531
04532
04533
04534
04535
04536
04537
04538
04539 i=0;
04540 {
04541 for (int32_t j=0; j<M; j++)
04542 {
04543 float64_t old_b=get_b(i,j) ;
04544
04545 set_b(i,j, log(exp(old_b)-delta)) ;
04546 invalidate_model() ;
04547 float64_t prob_old=(model_probability(-1)*p_observations->get_num_vectors()) ;
04548
04549 set_b(i,j, log(exp(old_b)+delta)) ;
04550 invalidate_model() ;
04551 float64_t prob_new=(model_probability(-1)*p_observations->get_num_vectors());
04552
04553 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04554
04555 set_b(i,j, old_b) ;
04556 invalidate_model() ;
04557
04558 float64_t deriv_calc=0 ;
04559 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04560 {
04561 deriv_calc+=exp(model_derivative_b(i, j, dim)-model_probability(dim)) ;
04562 if (j==1)
04563 SG_INFO("deriv_calc[%i]=%e\n",dim,deriv_calc) ;
04564 } ;
04565
04566 SG_ERROR( "b(%i,%i)=%e db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j,exp(old_b),i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04567 } ;
04568 } ;
04569 return true ;
04570 }
04571
04572 bool CHMM::check_model_derivatives()
04573 {
04574 bool result=false;
04575 const float64_t delta=3e-4 ;
04576
04577 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04578 {
04579 int32_t i ;
04580
04581 for (i=0; i<N; i++)
04582 {
04583 for (int32_t j=0; j<N; j++)
04584 {
04585 float64_t old_a=get_a(i,j) ;
04586
04587 set_a(i,j, log(exp(old_a)-delta)) ;
04588 invalidate_model() ;
04589 float64_t prob_old=exp(model_probability(dim)) ;
04590
04591 set_a(i,j, log(exp(old_a)+delta)) ;
04592 invalidate_model() ;
04593 float64_t prob_new=exp(model_probability(dim));
04594
04595 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04596
04597 set_a(i,j, old_a) ;
04598 invalidate_model() ;
04599 float64_t deriv_calc=exp(model_derivative_a(i, j, dim)) ;
04600
04601 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04602 invalidate_model() ;
04603 } ;
04604 } ;
04605 for (i=0; i<N; i++)
04606 {
04607 for (int32_t j=0; j<M; j++)
04608 {
04609 float64_t old_b=get_b(i,j) ;
04610
04611 set_b(i,j, log(exp(old_b)-delta)) ;
04612 invalidate_model() ;
04613 float64_t prob_old=exp(model_probability(dim)) ;
04614
04615 set_b(i,j, log(exp(old_b)+delta)) ;
04616 invalidate_model() ;
04617 float64_t prob_new=exp(model_probability(dim));
04618
04619 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04620
04621 set_b(i,j, old_b) ;
04622 invalidate_model() ;
04623 float64_t deriv_calc=exp(model_derivative_b(i, j, dim));
04624
04625 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04626 } ;
04627 } ;
04628
04629 #ifdef TEST
04630 for (i=0; i<N; i++)
04631 {
04632 float64_t old_p=get_p(i) ;
04633
04634 set_p(i, log(exp(old_p)-delta)) ;
04635 invalidate_model() ;
04636 float64_t prob_old=exp(model_probability(dim)) ;
04637
04638 set_p(i, log(exp(old_p)+delta)) ;
04639 invalidate_model() ;
04640 float64_t prob_new=exp(model_probability(dim));
04641 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04642
04643 set_p(i, old_p) ;
04644 invalidate_model() ;
04645 float64_t deriv_calc=exp(model_derivative_p(i, dim));
04646
04647
04648 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04649 } ;
04650 for (i=0; i<N; i++)
04651 {
04652 float64_t old_q=get_q(i) ;
04653
04654 set_q(i, log(exp(old_q)-delta)) ;
04655 invalidate_model() ;
04656 float64_t prob_old=exp(model_probability(dim)) ;
04657
04658 set_q(i, log(exp(old_q)+delta)) ;
04659 invalidate_model() ;
04660 float64_t prob_new=exp(model_probability(dim));
04661
04662 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04663
04664 set_q(i, old_q) ;
04665 invalidate_model() ;
04666 float64_t deriv_calc=exp(model_derivative_q(i, dim));
04667
04668
04669 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04670 } ;
04671 #endif
04672 }
04673 return result;
04674 }
04675
04676 #ifdef USE_HMMDEBUG
04677 bool CHMM::check_path_derivatives()
04678 {
04679 bool result=false;
04680 const float64_t delta=1e-4 ;
04681
04682 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04683 {
04684 int32_t i ;
04685
04686 for (i=0; i<N; i++)
04687 {
04688 for (int32_t j=0; j<N; j++)
04689 {
04690 float64_t old_a=get_a(i,j) ;
04691
04692 set_a(i,j, log(exp(old_a)-delta)) ;
04693 invalidate_model() ;
04694 float64_t prob_old=best_path(dim) ;
04695
04696 set_a(i,j, log(exp(old_a)+delta)) ;
04697 invalidate_model() ;
04698 float64_t prob_new=best_path(dim);
04699
04700 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04701
04702 set_a(i,j, old_a) ;
04703 invalidate_model() ;
04704 float64_t deriv_calc=path_derivative_a(i, j, dim) ;
04705
04706 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04707 } ;
04708 } ;
04709 for (i=0; i<N; i++)
04710 {
04711 for (int32_t j=0; j<M; j++)
04712 {
04713 float64_t old_b=get_b(i,j) ;
04714
04715 set_b(i,j, log(exp(old_b)-delta)) ;
04716 invalidate_model() ;
04717 float64_t prob_old=best_path(dim) ;
04718
04719 set_b(i,j, log(exp(old_b)+delta)) ;
04720 invalidate_model() ;
04721 float64_t prob_new=best_path(dim);
04722
04723 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04724
04725 set_b(i,j, old_b) ;
04726 invalidate_model() ;
04727 float64_t deriv_calc=path_derivative_b(i, j, dim);
04728
04729 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04730 } ;
04731 } ;
04732
04733 for (i=0; i<N; i++)
04734 {
04735 float64_t old_p=get_p(i) ;
04736
04737 set_p(i, log(exp(old_p)-delta)) ;
04738 invalidate_model() ;
04739 float64_t prob_old=best_path(dim) ;
04740
04741 set_p(i, log(exp(old_p)+delta)) ;
04742 invalidate_model() ;
04743 float64_t prob_new=best_path(dim);
04744 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04745
04746 set_p(i, old_p) ;
04747 invalidate_model() ;
04748 float64_t deriv_calc=path_derivative_p(i, dim);
04749
04750
04751 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04752 } ;
04753 for (i=0; i<N; i++)
04754 {
04755 float64_t old_q=get_q(i) ;
04756
04757 set_q(i, log(exp(old_q)-delta)) ;
04758 invalidate_model() ;
04759 float64_t prob_old=best_path(dim) ;
04760
04761 set_q(i, log(exp(old_q)+delta)) ;
04762 invalidate_model() ;
04763 float64_t prob_new=best_path(dim);
04764
04765 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04766
04767 set_q(i, old_q) ;
04768 invalidate_model() ;
04769 float64_t deriv_calc=path_derivative_q(i, dim);
04770
04771
04772 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04773 } ;
04774 }
04775 return result;
04776 }
04777 #endif // USE_HMMDEBUG
04778
04779
04780 void CHMM::normalize(bool keep_dead_states)
04781 {
04782 int32_t i,j;
04783 const float64_t INF=-1e10;
04784 float64_t sum_p =INF;
04785
04786 for (i=0; i<N; i++)
04787 {
04788 sum_p=CMath::logarithmic_sum(sum_p, get_p(i));
04789
04790 float64_t sum_b =INF;
04791 float64_t sum_a =get_q(i);
04792
04793 for (j=0; j<N; j++)
04794 sum_a=CMath::logarithmic_sum(sum_a, get_a(i,j));
04795
04796 if (sum_a>CMath::ALMOST_NEG_INFTY/N || (!keep_dead_states) )
04797 {
04798 for (j=0; j<N; j++)
04799 set_a(i,j, get_a(i,j)-sum_a);
04800 set_q(i, get_q(i)-sum_a);
04801 }
04802
04803 for (j=0; j<M; j++)
04804 sum_b=CMath::logarithmic_sum(sum_b, get_b(i,j));
04805 for (j=0; j<M; j++)
04806 set_b(i,j, get_b(i,j)-sum_b);
04807 }
04808
04809 for (i=0; i<N; i++)
04810 set_p(i, get_p(i)-sum_p);
04811
04812 invalidate_model();
04813 }
04814
04815 bool CHMM::append_model(CHMM* app_model)
04816 {
04817 bool result=false;
04818 const int32_t num_states=app_model->get_N();
04819 int32_t i,j;
04820
04821 SG_DEBUG( "cur N:%d M:%d\n", N, M);
04822 SG_DEBUG( "old N:%d M:%d\n", app_model->get_N(), app_model->get_M());
04823 if (app_model->get_M() == get_M())
04824 {
04825 float64_t* n_p=new float64_t[N+num_states];
04826 float64_t* n_q=new float64_t[N+num_states];
04827 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
04828
04829 float64_t* n_b=new float64_t[(N+num_states)*M];
04830
04831
04832 for (i=0; i<N+num_states; i++)
04833 {
04834 n_p[i]=-CMath::INFTY;
04835 n_q[i]=-CMath::INFTY;
04836
04837 for (j=0; j<N+num_states; j++)
04838 n_a[(N+num_states)*i+j]=-CMath::INFTY;
04839
04840 for (j=0; j<M; j++)
04841 n_b[M*i+j]=-CMath::INFTY;
04842 }
04843
04844
04845
04846
04847
04848
04849 for (i=0; i<N; i++)
04850 {
04851 n_p[i]=get_p(i);
04852
04853 for (j=0; j<N; j++)
04854 n_a[(N+num_states)*j+i]=get_a(i,j);
04855
04856 for (j=0; j<M; j++)
04857 {
04858 n_b[M*i+j]=get_b(i,j);
04859 }
04860 }
04861
04862
04863 for (i=0; i<app_model->get_N(); i++)
04864 {
04865 n_q[i+N]=app_model->get_q(i);
04866
04867 for (j=0; j<app_model->get_N(); j++)
04868 n_a[(N+num_states)*(j+N)+(i+N)]=app_model->get_a(i,j);
04869 for (j=0; j<app_model->get_M(); j++)
04870 n_b[M*(i+N)+j]=app_model->get_b(i,j);
04871 }
04872
04873
04874
04875 for (i=0; i<N; i++)
04876 {
04877 for (j=N; j<N+num_states; j++)
04878 n_a[(N+num_states)*j + i]=CMath::logarithmic_sum(get_q(i)+app_model->get_p(j-N), n_a[(N+num_states)*j + i]);
04879 }
04880
04881 free_state_dependend_arrays();
04882 N+=num_states;
04883
04884 alloc_state_dependend_arrays();
04885
04886
04887 delete[] initial_state_distribution_p;
04888 delete[] end_state_distribution_q;
04889 delete[] transition_matrix_a;
04890 delete[] observation_matrix_b;
04891
04892 transition_matrix_a=n_a;
04893 observation_matrix_b=n_b;
04894 initial_state_distribution_p=n_p;
04895 end_state_distribution_q=n_q;
04896
04897 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
04899 invalidate_model();
04900 }
04901 else
04902 SG_ERROR( "number of observations is different for append model, doing nothing!\n");
04903
04904 return result;
04905 }
04906
04907 bool CHMM::append_model(CHMM* app_model, float64_t* cur_out, float64_t* app_out)
04908 {
04909 bool result=false;
04910 const int32_t num_states=app_model->get_N()+2;
04911 int32_t i,j;
04912
04913 if (app_model->get_M() == get_M())
04914 {
04915 float64_t* n_p=new float64_t[N+num_states];
04916 float64_t* n_q=new float64_t[N+num_states];
04917 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
04918
04919 float64_t* n_b=new float64_t[(N+num_states)*M];
04920
04921
04922 for (i=0; i<N+num_states; i++)
04923 {
04924 n_p[i]=-CMath::INFTY;
04925 n_q[i]=-CMath::INFTY;
04926
04927 for (j=0; j<N+num_states; j++)
04928 n_a[(N+num_states)*j+i]=-CMath::INFTY;
04929
04930 for (j=0; j<M; j++)
04931 n_b[M*i+j]=-CMath::INFTY;
04932 }
04933
04934
04935
04936
04937
04938
04939 for (i=0; i<N; i++)
04940 {
04941 n_p[i]=get_p(i);
04942
04943 for (j=0; j<N; j++)
04944 n_a[(N+num_states)*j+i]=get_a(i,j);
04945
04946 for (j=0; j<M; j++)
04947 {
04948 n_b[M*i+j]=get_b(i,j);
04949 }
04950 }
04951
04952
04953 for (i=0; i<app_model->get_N(); i++)
04954 {
04955 n_q[i+N+2]=app_model->get_q(i);
04956
04957 for (j=0; j<app_model->get_N(); j++)
04958 n_a[(N+num_states)*(j+N+2)+(i+N+2)]=app_model->get_a(i,j);
04959 for (j=0; j<app_model->get_M(); j++)
04960 n_b[M*(i+N+2)+j]=app_model->get_b(i,j);
04961 }
04962
04963
04964
04965
04966 for (i=0; i<M; i++)
04967 {
04968 n_b[M*N+i]=cur_out[i];
04969 n_b[M*(N+1)+i]=app_out[i];
04970 }
04971
04972
04973 for (i=0; i<N+num_states; i++)
04974 {
04975
04976 if (i==N+1)
04977 n_a[(N+num_states)*i + N]=0;
04978
04979
04980
04981 if (i<N)
04982 n_a[(N+num_states)*N+i]=get_q(i);
04983
04984
04985
04986 if (i>=N+2)
04987 n_a[(N+num_states)*i+(N+1)]=app_model->get_p(i-(N+2));
04988 }
04989
04990 free_state_dependend_arrays();
04991 N+=num_states;
04992
04993 alloc_state_dependend_arrays();
04994
04995
04996 delete[] initial_state_distribution_p;
04997 delete[] end_state_distribution_q;
04998 delete[] transition_matrix_a;
04999 delete[] observation_matrix_b;
05000
05001 transition_matrix_a=n_a;
05002 observation_matrix_b=n_b;
05003 initial_state_distribution_p=n_p;
05004 end_state_distribution_q=n_q;
05005
05006 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
05008 invalidate_model();
05009 }
05010
05011 return result;
05012 }
05013
05014
05015 void CHMM::add_states(int32_t num_states, float64_t default_value)
05016 {
05017 int32_t i,j;
05018 const float64_t MIN_RAND=1e-2;
05019 const float64_t MAX_RAND=2e-1;
05020
05021 float64_t* n_p=new float64_t[N+num_states];
05022 float64_t* n_q=new float64_t[N+num_states];
05023 float64_t* n_a=new float64_t[(N+num_states)*(N+num_states)];
05024
05025 float64_t* n_b=new float64_t[(N+num_states)*M];
05026
05027
05028
05029 for (i=0; i<N; i++)
05030 {
05031 n_p[i]=get_p(i);
05032 n_q[i]=get_q(i);
05033
05034 for (j=0; j<N; j++)
05035 n_a[(N+num_states)*j+i]=get_a(i,j);
05036
05037 for (j=0; j<M; j++)
05038 n_b[M*i+j]=get_b(i,j);
05039 }
05040
05041 for (i=N; i<N+num_states; i++)
05042 {
05043 n_p[i]=VAL_MACRO;
05044 n_q[i]=VAL_MACRO;
05045
05046 for (j=0; j<N; j++)
05047 n_a[(N+num_states)*i+j]=VAL_MACRO;
05048
05049 for (j=0; j<N+num_states; j++)
05050 n_a[(N+num_states)*j+i]=VAL_MACRO;
05051
05052 for (j=0; j<M; j++)
05053 n_b[M*i+j]=VAL_MACRO;
05054 }
05055 free_state_dependend_arrays();
05056 N+=num_states;
05057
05058 alloc_state_dependend_arrays();
05059
05060
05061 delete[] initial_state_distribution_p;
05062 delete[] end_state_distribution_q;
05063 delete[] transition_matrix_a;
05064 delete[] observation_matrix_b;
05065
05066 transition_matrix_a=n_a;
05067 observation_matrix_b=n_b;
05068 initial_state_distribution_p=n_p;
05069 end_state_distribution_q=n_q;
05070
05071 invalidate_model();
05072 normalize();
05073 }
05074
05075 void CHMM::chop(float64_t value)
05076 {
05077 for (int32_t i=0; i<N; i++)
05078 {
05079 int32_t j;
05080
05081 if (exp(get_p(i)) < value)
05082 set_p(i, CMath::ALMOST_NEG_INFTY);
05083
05084 if (exp(get_q(i)) < value)
05085 set_q(i, CMath::ALMOST_NEG_INFTY);
05086
05087 for (j=0; j<N; j++)
05088 {
05089 if (exp(get_a(i,j)) < value)
05090 set_a(i,j, CMath::ALMOST_NEG_INFTY);
05091 }
05092
05093 for (j=0; j<M; j++)
05094 {
05095 if (exp(get_b(i,j)) < value)
05096 set_b(i,j, CMath::ALMOST_NEG_INFTY);
05097 }
05098 }
05099 normalize();
05100 invalidate_model();
05101 }
05102
05103 bool CHMM::linear_train(bool right_align)
05104 {
05105 if (p_observations)
05106 {
05107 int32_t histsize=(get_M()*get_N());
05108 int32_t* hist=new int32_t[histsize];
05109 int32_t* startendhist=new int32_t[get_N()];
05110 int32_t i,dim;
05111
05112 ASSERT(p_observations->get_max_vector_length()<=get_N());
05113
05114 for (i=0; i<histsize; i++)
05115 hist[i]=0;
05116
05117 for (i=0; i<get_N(); i++)
05118 startendhist[i]=0;
05119
05120 if (right_align)
05121 {
05122 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05123 {
05124 int32_t len=0;
05125 bool free_vec;
05126 uint16_t* obs=p_observations->get_feature_vector(dim, len, free_vec);
05127
05128 ASSERT(len<=get_N());
05129 startendhist[(get_N()-len)]++;
05130
05131 for (i=0;i<len;i++)
05132 hist[(get_N()-len+i)*get_M() + *obs++]++;
05133
05134 p_observations->free_feature_vector(obs, dim, free_vec);
05135 }
05136
05137 set_q(get_N()-1, 1);
05138 for (i=0; i<get_N()-1; i++)
05139 set_q(i, 0);
05140
05141 for (i=0; i<get_N(); i++)
05142 set_p(i, startendhist[i]+PSEUDO);
05143
05144 for (i=0;i<get_N();i++)
05145 {
05146 for (int32_t j=0; j<get_N(); j++)
05147 {
05148 if (i==j-1)
05149 set_a(i,j, 1);
05150 else
05151 set_a(i,j, 0);
05152 }
05153 }
05154 }
05155 else
05156 {
05157 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05158 {
05159 int32_t len=0;
05160 bool free_vec;
05161 uint16_t* obs=p_observations->get_feature_vector(dim, len, free_vec);
05162
05163 ASSERT(len<=get_N());
05164 for (i=0;i<len;i++)
05165 hist[i*get_M() + *obs++]++;
05166
05167 startendhist[len-1]++;
05168
05169 p_observations->free_feature_vector(obs, dim, free_vec);
05170 }
05171
05172 set_p(0, 1);
05173 for (i=1; i<get_N(); i++)
05174 set_p(i, 0);
05175
05176 for (i=0; i<get_N(); i++)
05177 set_q(i, startendhist[i]+PSEUDO);
05178
05179 int32_t total=p_observations->get_num_vectors();
05180
05181 for (i=0;i<get_N();i++)
05182 {
05183 total-= startendhist[i] ;
05184
05185 for (int32_t j=0; j<get_N(); j++)
05186 {
05187 if (i==j-1)
05188 set_a(i,j, total+PSEUDO);
05189 else
05190 set_a(i,j, 0);
05191 }
05192 }
05193 ASSERT(total==0);
05194 }
05195
05196 for (i=0;i<get_N();i++)
05197 {
05198 for (int32_t j=0; j<get_M(); j++)
05199 {
05200 float64_t sum=0;
05201 int32_t offs=i*get_M()+ p_observations->get_masked_symbols((uint16_t) j, (uint8_t) 254);
05202
05203 for (int32_t k=0; k<p_observations->get_original_num_symbols(); k++)
05204 sum+=hist[offs+k];
05205
05206 set_b(i,j, (PSEUDO+hist[i*get_M()+j])/(sum+PSEUDO*p_observations->get_original_num_symbols()));
05207 }
05208 }
05209
05210 delete[] hist;
05211 delete[] startendhist;
05212 convert_to_log();
05213 invalidate_model();
05214 return true;
05215 }
05216 else
05217 return false;
05218 }
05219
05220 void CHMM::set_observation_nocache(CStringFeatures<uint16_t>* obs)
05221 {
05222 ASSERT(obs);
05223 p_observations=obs;
05224 SG_REF(obs);
05225
05226 if (obs)
05227 if (obs->get_num_symbols() > M)
05228 SG_ERROR( "number of symbols in observation (%ld) larger than M (%d)\n", (long) obs->get_num_symbols(), M);
05229
05230 if (!reused_caches)
05231 {
05232 #ifdef USE_HMMPARALLEL_STRUCTURES
05233 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05234 {
05235 delete[] alpha_cache[i].table;
05236 delete[] beta_cache[i].table;
05237 delete[] states_per_observation_psi[i];
05238 delete[] path[i];
05239
05240 alpha_cache[i].table=NULL;
05241 beta_cache[i].table=NULL;
05242 states_per_observation_psi[i]=NULL;
05243 path[i]=NULL;
05244 } ;
05245 #else
05246 delete[] alpha_cache.table;
05247 delete[] beta_cache.table;
05248 delete[] states_per_observation_psi;
05249 delete[] path;
05250
05251 alpha_cache.table=NULL;
05252 beta_cache.table=NULL;
05253 states_per_observation_psi=NULL;
05254 path=NULL;
05255
05256 #endif //USE_HMMPARALLEL_STRUCTURES
05257 }
05258
05259 invalidate_model();
05260 }
05261
05262 void CHMM::set_observations(CStringFeatures<uint16_t>* obs, CHMM* lambda)
05263 {
05264 ASSERT(obs);
05265 SG_REF(obs);
05266 p_observations=obs;
05267
05268
05269
05270
05271 SG_REF(obs);
05272 features=obs;
05273
05274 SG_DEBUG("num symbols alphabet: %ld\n", obs->get_alphabet()->get_num_symbols());
05275 SG_DEBUG("num symbols: %ld\n", obs->get_num_symbols());
05276 SG_DEBUG("M: %d\n", M);
05277
05278 if (obs)
05279 {
05280 if (obs->get_num_symbols() > M)
05281 {
05282 SG_ERROR( "number of symbols in observation (%ld) larger than M (%d)\n", (long) obs->get_num_symbols(), M);
05283 }
05284 }
05285
05286 if (!reused_caches)
05287 {
05288 #ifdef USE_HMMPARALLEL_STRUCTURES
05289 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05290 {
05291 delete[] alpha_cache[i].table;
05292 delete[] beta_cache[i].table;
05293 delete[] states_per_observation_psi[i];
05294 delete[] path[i];
05295
05296 alpha_cache[i].table=NULL;
05297 beta_cache[i].table=NULL;
05298 states_per_observation_psi[i]=NULL;
05299 path[i]=NULL;
05300 } ;
05301 #else
05302 delete[] alpha_cache.table;
05303 delete[] beta_cache.table;
05304 delete[] states_per_observation_psi;
05305 delete[] path;
05306
05307 alpha_cache.table=NULL;
05308 beta_cache.table=NULL;
05309 states_per_observation_psi=NULL;
05310 path=NULL;
05311
05312 #endif //USE_HMMPARALLEL_STRUCTURES
05313 }
05314
05315 if (obs!=NULL)
05316 {
05317 int32_t max_T=obs->get_max_vector_length();
05318
05319 if (lambda)
05320 {
05321 #ifdef USE_HMMPARALLEL_STRUCTURES
05322 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05323 {
05324 this->alpha_cache[i].table= lambda->alpha_cache[i].table;
05325 this->beta_cache[i].table= lambda->beta_cache[i].table;
05326 this->states_per_observation_psi[i]=lambda->states_per_observation_psi[i] ;
05327 this->path[i]=lambda->path[i];
05328 } ;
05329 #else
05330 this->alpha_cache.table= lambda->alpha_cache.table;
05331 this->beta_cache.table= lambda->beta_cache.table;
05332 this->states_per_observation_psi= lambda->states_per_observation_psi;
05333 this->path=lambda->path;
05334 #endif //USE_HMMPARALLEL_STRUCTURES
05335
05336 this->reused_caches=true;
05337 }
05338 else
05339 {
05340 this->reused_caches=false;
05341 #ifdef USE_HMMPARALLEL_STRUCTURES
05342 SG_INFO( "allocating mem for path-table of size %.2f Megabytes (%d*%d) each:\n", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05343 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05344 {
05345 if ((states_per_observation_psi[i]=new T_STATES[max_T*N])!=NULL)
05346 SG_DEBUG( "path_table[%i] successfully allocated\n",i) ;
05347 else
05348 SG_ERROR( "failed allocating memory for path_table[%i].\n",i) ;
05349 path[i]=new T_STATES[max_T];
05350 }
05351 #else // no USE_HMMPARALLEL_STRUCTURES
05352 SG_INFO( "allocating mem of size %.2f Megabytes (%d*%d) for path-table ....", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05353 if ((states_per_observation_psi=new T_STATES[max_T*N]) != NULL)
05354 SG_DONE();
05355 else
05356 SG_ERROR( "failed.\n") ;
05357
05358 path=new T_STATES[max_T];
05359 #endif // USE_HMMPARALLEL_STRUCTURES
05360 #ifdef USE_HMMCACHE
05361 SG_INFO( "allocating mem for caches each of size %.2f Megabytes (%d*%d) ....\n", ((float32_t)max_T)*N*sizeof(T_ALPHA_BETA_TABLE)/(1024*1024), max_T, N);
05362
05363 #ifdef USE_HMMPARALLEL_STRUCTURES
05364 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05365 {
05366 if ((alpha_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N])!=NULL)
05367 SG_DEBUG( "alpha_cache[%i].table successfully allocated\n",i) ;
05368 else
05369 SG_ERROR("allocation of alpha_cache[%i].table failed\n",i) ;
05370
05371 if ((beta_cache[i].table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05372 SG_DEBUG("beta_cache[%i].table successfully allocated\n",i) ;
05373 else
05374 SG_ERROR("allocation of beta_cache[%i].table failed\n",i) ;
05375 } ;
05376 #else // USE_HMMPARALLEL_STRUCTURES
05377 if ((alpha_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05378 SG_DEBUG( "alpha_cache.table successfully allocated\n") ;
05379 else
05380 SG_ERROR( "allocation of alpha_cache.table failed\n") ;
05381
05382 if ((beta_cache.table=new T_ALPHA_BETA_TABLE[max_T*N]) != NULL)
05383 SG_DEBUG( "beta_cache.table successfully allocated\n") ;
05384 else
05385 SG_ERROR( "allocation of beta_cache.table failed\n") ;
05386
05387 #endif // USE_HMMPARALLEL_STRUCTURES
05388 #else // USE_HMMCACHE
05389 #ifdef USE_HMMPARALLEL_STRUCTURES
05390 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05391 {
05392 alpha_cache[i].table=NULL ;
05393 beta_cache[i].table=NULL ;
05394 } ;
05395 #else //USE_HMMPARALLEL_STRUCTURES
05396 alpha_cache.table=NULL ;
05397 beta_cache.table=NULL ;
05398 #endif //USE_HMMPARALLEL_STRUCTURES
05399 #endif //USE_HMMCACHE
05400 }
05401 }
05402
05403
05404 invalidate_model();
05405 }
05406
05407 bool CHMM::permutation_entropy(int32_t window_width, int32_t sequence_number)
05408 {
05409 if (p_observations && window_width>0 &&
05410 ( sequence_number<0 || sequence_number < p_observations->get_num_vectors()))
05411 {
05412 int32_t min_sequence=sequence_number;
05413 int32_t max_sequence=sequence_number;
05414
05415 if (sequence_number<0)
05416 {
05417 min_sequence=0;
05418 max_sequence=p_observations->get_num_vectors();
05419 SG_INFO( "numseq: %d\n", max_sequence);
05420 }
05421
05422 SG_INFO( "min_sequence: %d max_sequence: %d\n", min_sequence, max_sequence);
05423 for (sequence_number=min_sequence; sequence_number<max_sequence; sequence_number++)
05424 {
05425 int32_t sequence_length=0;
05426 bool free_vec;
05427 uint16_t* obs=p_observations->get_feature_vector(sequence_number, sequence_length, free_vec);
05428
05429 int32_t histsize=get_M();
05430 int64_t* hist=new int64_t[histsize];
05431 int32_t i,j;
05432
05433 for (i=0; i<sequence_length-window_width; i++)
05434 {
05435 for (j=0; j<histsize; j++)
05436 hist[j]=0;
05437
05438 uint16_t* ptr=&obs[i];
05439 for (j=0; j<window_width; j++)
05440 {
05441 hist[*ptr++]++;
05442 }
05443
05444 float64_t perm_entropy=0;
05445 for (j=0; j<get_M(); j++)
05446 {
05447 float64_t p=
05448 (((float64_t) hist[j])+PSEUDO)/
05449 (window_width+get_M()*PSEUDO);
05450 perm_entropy+=p*log(p);
05451 }
05452
05453 SG_PRINT( "%f\n", perm_entropy);
05454 }
05455 p_observations->free_feature_vector(obs, sequence_number, free_vec);
05456
05457 delete[] hist;
05458 }
05459 return true;
05460 }
05461 else
05462 return false;
05463 }
05464
05465 float64_t CHMM::get_log_derivative(int32_t num_param, int32_t num_example)
05466 {
05467 if (num_param<N)
05468 return model_derivative_p(num_param, num_example);
05469 else if (num_param<2*N)
05470 return model_derivative_q(num_param-N, num_example);
05471 else if (num_param<N*(N+2))
05472 {
05473 int32_t k=num_param-2*N;
05474 int32_t i=k/N;
05475 int32_t j=k%N;
05476 return model_derivative_a(i,j, num_example);
05477 }
05478 else if (num_param<N*(N+2+M))
05479 {
05480 int32_t k=num_param-N*(N+2);
05481 int32_t i=k/M;
05482 int32_t j=k%M;
05483 return model_derivative_b(i,j, num_example);
05484 }
05485
05486 ASSERT(false);
05487 return -1;
05488 }
05489
05490 float64_t CHMM::get_log_model_parameter(int32_t num_param)
05491 {
05492 if (num_param<N)
05493 return get_p(num_param);
05494 else if (num_param<2*N)
05495 return get_q(num_param-N);
05496 else if (num_param<N*(N+2))
05497 return transition_matrix_a[num_param-2*N];
05498 else if (num_param<N*(N+2+M))
05499 return observation_matrix_b[num_param-N*(N+2)];
05500
05501 ASSERT(false);
05502 return -1;
05503 }
05504
05505
05506
05507 bool CHMM::converged(float64_t x, float64_t y)
05508 {
05509 float64_t diff=y-x;
05510 float64_t absdiff=fabs(diff);
05511
05512 SG_INFO( "\n #%03d\tbest result so far: %G (eps: %f)", iteration_count, y, diff);
05513
05514 if (iteration_count--==0 || (absdiff<epsilon && conv_it<=0))
05515 {
05516 iteration_count=iterations;
05517 SG_INFO( "...finished\n");
05518 conv_it=5;
05519 return true;
05520 }
05521 else
05522 {
05523 if (absdiff<epsilon)
05524 conv_it--;
05525 else
05526 conv_it=5;
05527
05528 return false;
05529 }
05530 }
05531
05532 bool CHMM::baum_welch_viterbi_train(BaumWelchViterbiType type)
05533 {
05534 CHMM* estimate=new CHMM(this);
05535 CHMM* working=this;
05536 float64_t prob_max=-CMath::INFTY;
05537 float64_t prob=-CMath::INFTY;
05538 float64_t prob_train=CMath::ALMOST_NEG_INFTY;
05539 iteration_count=iterations;
05540
05541 while (!converged(prob, prob_train) && (!CSignal::cancel_computations()))
05542 {
05543 CMath::swap(working, estimate);
05544 prob=prob_train;
05545
05546 switch (type) {
05547 case BW_NORMAL:
05548 working->estimate_model_baum_welch(estimate); break;
05549 case BW_TRANS:
05550 working->estimate_model_baum_welch_trans(estimate); break;
05551 case BW_DEFINED:
05552 working->estimate_model_baum_welch_defined(estimate); break;
05553 case VIT_NORMAL:
05554 working->estimate_model_viterbi(estimate); break;
05555 case VIT_DEFINED:
05556 working->estimate_model_viterbi_defined(estimate); break;
05557 }
05558 prob_train=estimate->model_probability();
05559
05560 if (prob_max<prob_train)
05561 prob_max=prob_train;
05562 }
05563
05564 if (estimate == this)
05565 {
05566 estimate->copy_model(working);
05567 delete working;
05568 }
05569 else
05570 delete estimate;
05571
05572 return true;
05573 }