00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/distributions/HMM.h>
00012 #include <shogun/mathematics/Math.h>
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/lib/config.h>
00015 #include <shogun/lib/Signal.h>
00016 #include <shogun/base/Parallel.h>
00017 #include <shogun/features/StringFeatures.h>
00018 #include <shogun/features/Alphabet.h>
00019
00020 #include <stdlib.h>
00021 #include <stdio.h>
00022 #include <time.h>
00023 #include <ctype.h>
00024
00025 #define VAL_MACRO log((default_value == 0) ? (CMath::random(MIN_RAND, MAX_RAND)) : default_value)
00026 #define ARRAY_SIZE 65336
00027
00028 using namespace shogun;
00029
00031
00033
00034 const int32_t CHMM::GOTN= (1<<1);
00035 const int32_t CHMM::GOTM= (1<<2);
00036 const int32_t CHMM::GOTO= (1<<3);
00037 const int32_t CHMM::GOTa= (1<<4);
00038 const int32_t CHMM::GOTb= (1<<5);
00039 const int32_t CHMM::GOTp= (1<<6);
00040 const int32_t CHMM::GOTq= (1<<7);
00041
00042 const int32_t CHMM::GOTlearn_a= (1<<1);
00043 const int32_t CHMM::GOTlearn_b= (1<<2);
00044 const int32_t CHMM::GOTlearn_p= (1<<3);
00045 const int32_t CHMM::GOTlearn_q= (1<<4);
00046 const int32_t CHMM::GOTconst_a= (1<<5);
00047 const int32_t CHMM::GOTconst_b= (1<<6);
00048 const int32_t CHMM::GOTconst_p= (1<<7);
00049 const int32_t CHMM::GOTconst_q= (1<<8);
00050
00051 enum E_STATE
00052 {
00053 INITIAL,
00054 ARRAYs,
00055 GET_N,
00056 GET_M,
00057 GET_a,
00058 GET_b,
00059 GET_p,
00060 GET_q,
00061 GET_learn_a,
00062 GET_learn_b,
00063 GET_learn_p,
00064 GET_learn_q,
00065 GET_const_a,
00066 GET_const_b,
00067 GET_const_p,
00068 GET_const_q,
00069 COMMENT,
00070 END
00071 };
00072
00073
00074 #ifdef FIX_POS
00075 const char Model::FIX_DISALLOWED=0 ;
00076 const char Model::FIX_ALLOWED=1 ;
00077 const char Model::FIX_DEFAULT=-1 ;
00078 const float64_t Model::DISALLOWED_PENALTY=CMath::ALMOST_NEG_INFTY ;
00079 #endif
00080
00081 Model::Model()
00082 {
00083 const_a=SG_MALLOC(int, ARRAY_SIZE);
00084 const_b=SG_MALLOC(int, ARRAY_SIZE);
00085 const_p=SG_MALLOC(int, ARRAY_SIZE);
00086 const_q=SG_MALLOC(int, ARRAY_SIZE);
00087 const_a_val=SG_MALLOC(float64_t, ARRAY_SIZE);
00088 const_b_val=SG_MALLOC(float64_t, ARRAY_SIZE);
00089 const_p_val=SG_MALLOC(float64_t, ARRAY_SIZE);
00090 const_q_val=SG_MALLOC(float64_t, ARRAY_SIZE);
00091
00092
00093 learn_a=SG_MALLOC(int, ARRAY_SIZE);
00094 learn_b=SG_MALLOC(int, ARRAY_SIZE);
00095 learn_p=SG_MALLOC(int, ARRAY_SIZE);
00096 learn_q=SG_MALLOC(int, ARRAY_SIZE);
00097
00098 #ifdef FIX_POS
00099 fix_pos_state = SG_MALLOC(char, ARRAY_SIZE);
00100 #endif
00101 for (int32_t i=0; i<ARRAY_SIZE; i++)
00102 {
00103 const_a[i]=-1 ;
00104 const_b[i]=-1 ;
00105 const_p[i]=-1 ;
00106 const_q[i]=-1 ;
00107 const_a_val[i]=1.0 ;
00108 const_b_val[i]=1.0 ;
00109 const_p_val[i]=1.0 ;
00110 const_q_val[i]=1.0 ;
00111 learn_a[i]=-1 ;
00112 learn_b[i]=-1 ;
00113 learn_p[i]=-1 ;
00114 learn_q[i]=-1 ;
00115 #ifdef FIX_POS
00116 fix_pos_state[i] = FIX_DEFAULT ;
00117 #endif
00118 } ;
00119 }
00120
00121 Model::~Model()
00122 {
00123 SG_FREE(const_a);
00124 SG_FREE(const_b);
00125 SG_FREE(const_p);
00126 SG_FREE(const_q);
00127 SG_FREE(const_a_val);
00128 SG_FREE(const_b_val);
00129 SG_FREE(const_p_val);
00130 SG_FREE(const_q_val);
00131
00132 SG_FREE(learn_a);
00133 SG_FREE(learn_b);
00134 SG_FREE(learn_p);
00135 SG_FREE(learn_q);
00136
00137 #ifdef FIX_POS
00138 SG_FREE(fix_pos_state);
00139 #endif
00140
00141 }
00142
00143 CHMM::CHMM(void)
00144 {
00145 N=0;
00146 M=0;
00147 model=NULL;
00148 status=false;
00149 }
00150
00151 CHMM::CHMM(CHMM* h)
00152 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00153 {
00154 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00155
00156 this->N=h->get_N();
00157 this->M=h->get_M();
00158 status=initialize(NULL, h->get_pseudo());
00159 this->copy_model(h);
00160 set_observations(h->p_observations);
00161 }
00162
00163 CHMM::CHMM(int32_t p_N, int32_t p_M, Model* p_model, float64_t p_PSEUDO)
00164 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00165 {
00166 this->N=p_N;
00167 this->M=p_M;
00168 model=NULL ;
00169
00170 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00171
00172 status=initialize(p_model, p_PSEUDO);
00173 }
00174
00175 CHMM::CHMM(
00176 CStringFeatures<uint16_t>* obs, int32_t p_N, int32_t p_M,
00177 float64_t p_PSEUDO)
00178 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00179 {
00180 this->N=p_N;
00181 this->M=p_M;
00182 model=NULL ;
00183
00184 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00185
00186 initialize(model, p_PSEUDO);
00187 set_observations(obs);
00188 }
00189
00190 CHMM::CHMM(int32_t p_N, float64_t* p, float64_t* q, float64_t* a)
00191 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00192 {
00193 this->N=p_N;
00194 this->M=0;
00195 model=NULL ;
00196
00197 trans_list_forward = NULL ;
00198 trans_list_forward_cnt = NULL ;
00199 trans_list_forward_val = NULL ;
00200 trans_list_backward = NULL ;
00201 trans_list_backward_cnt = NULL ;
00202 trans_list_len = 0 ;
00203 mem_initialized = false ;
00204
00205 this->transition_matrix_a=NULL;
00206 this->observation_matrix_b=NULL;
00207 this->initial_state_distribution_p=NULL;
00208 this->end_state_distribution_q=NULL;
00209 this->PSEUDO= PSEUDO;
00210 this->model= model;
00211 this->p_observations=NULL;
00212 this->reused_caches=false;
00213
00214 #ifdef USE_HMMPARALLEL_STRUCTURES
00215 this->alpha_cache=NULL;
00216 this->beta_cache=NULL;
00217 #else
00218 this->alpha_cache.table=NULL;
00219 this->beta_cache.table=NULL;
00220 this->alpha_cache.dimension=0;
00221 this->beta_cache.dimension=0;
00222 #endif
00223
00224 this->states_per_observation_psi=NULL ;
00225 this->path=NULL;
00226 arrayN1=NULL ;
00227 arrayN2=NULL ;
00228
00229 this->loglikelihood=false;
00230 mem_initialized = true ;
00231
00232 transition_matrix_a=a ;
00233 observation_matrix_b=NULL ;
00234 initial_state_distribution_p=p ;
00235 end_state_distribution_q=q ;
00236 transition_matrix_A=NULL ;
00237 observation_matrix_B=NULL ;
00238
00239
00240 }
00241
00242 CHMM::CHMM(
00243 int32_t p_N, float64_t* p, float64_t* q, int32_t num_trans,
00244 float64_t* a_trans)
00245 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00246 {
00247 model=NULL ;
00248
00249 this->N=p_N;
00250 this->M=0;
00251
00252 trans_list_forward = NULL ;
00253 trans_list_forward_cnt = NULL ;
00254 trans_list_forward_val = NULL ;
00255 trans_list_backward = NULL ;
00256 trans_list_backward_cnt = NULL ;
00257 trans_list_len = 0 ;
00258 mem_initialized = false ;
00259
00260 this->transition_matrix_a=NULL;
00261 this->observation_matrix_b=NULL;
00262 this->initial_state_distribution_p=NULL;
00263 this->end_state_distribution_q=NULL;
00264 this->PSEUDO= PSEUDO;
00265 this->model= model;
00266 this->p_observations=NULL;
00267 this->reused_caches=false;
00268
00269 #ifdef USE_HMMPARALLEL_STRUCTURES
00270 this->alpha_cache=NULL;
00271 this->beta_cache=NULL;
00272 #else
00273 this->alpha_cache.table=NULL;
00274 this->beta_cache.table=NULL;
00275 this->alpha_cache.dimension=0;
00276 this->beta_cache.dimension=0;
00277 #endif
00278
00279 this->states_per_observation_psi=NULL ;
00280 this->path=NULL;
00281 arrayN1=NULL ;
00282 arrayN2=NULL ;
00283
00284 this->loglikelihood=false;
00285 mem_initialized = true ;
00286
00287 trans_list_forward_cnt=NULL ;
00288 trans_list_len = N ;
00289 trans_list_forward = SG_MALLOC(T_STATES*, N);
00290 trans_list_forward_val = SG_MALLOC(float64_t*, N);
00291 trans_list_forward_cnt = SG_MALLOC(T_STATES, N);
00292
00293 int32_t start_idx=0;
00294 for (int32_t j=0; j<N; j++)
00295 {
00296 int32_t old_start_idx=start_idx;
00297
00298 while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00299 {
00300 start_idx++;
00301
00302 if (start_idx>1 && start_idx<num_trans)
00303 ASSERT(a_trans[start_idx+num_trans-1]<=
00304 a_trans[start_idx+num_trans]);
00305 }
00306
00307 if (start_idx>1 && start_idx<num_trans)
00308 ASSERT(a_trans[start_idx+num_trans-1]<=
00309 a_trans[start_idx+num_trans]);
00310
00311 int32_t len=start_idx-old_start_idx;
00312 ASSERT(len>=0);
00313
00314 trans_list_forward_cnt[j] = 0 ;
00315
00316 if (len>0)
00317 {
00318 trans_list_forward[j] = SG_MALLOC(T_STATES, len);
00319 trans_list_forward_val[j] = SG_MALLOC(float64_t, len);
00320 }
00321 else
00322 {
00323 trans_list_forward[j] = NULL;
00324 trans_list_forward_val[j] = NULL;
00325 }
00326 }
00327
00328 for (int32_t i=0; i<num_trans; i++)
00329 {
00330 int32_t from = (int32_t)a_trans[i+num_trans] ;
00331 int32_t to = (int32_t)a_trans[i] ;
00332 float64_t val = a_trans[i+num_trans*2] ;
00333
00334 ASSERT(from>=0 && from<N);
00335 ASSERT(to>=0 && to<N);
00336
00337 trans_list_forward[from][trans_list_forward_cnt[from]]=to ;
00338 trans_list_forward_val[from][trans_list_forward_cnt[from]]=val ;
00339 trans_list_forward_cnt[from]++ ;
00340
00341 } ;
00342
00343 transition_matrix_a=NULL ;
00344 observation_matrix_b=NULL ;
00345 initial_state_distribution_p=p ;
00346 end_state_distribution_q=q ;
00347 transition_matrix_A=NULL ;
00348 observation_matrix_B=NULL ;
00349
00350
00351 }
00352
00353
00354 CHMM::CHMM(FILE* model_file, float64_t p_PSEUDO)
00355 : CDistribution(), iterations(150), epsilon(1e-4), conv_it(5)
00356 {
00357 SG_INFO( "hmm is using %i separate tables\n", parallel->get_num_threads()) ;
00358
00359 status=initialize(NULL, p_PSEUDO, model_file);
00360 }
00361
00362 CHMM::~CHMM()
00363 {
00364 SG_UNREF(p_observations);
00365
00366 if (trans_list_forward_cnt)
00367 SG_FREE(trans_list_forward_cnt);
00368 if (trans_list_backward_cnt)
00369 SG_FREE(trans_list_backward_cnt);
00370 if (trans_list_forward)
00371 {
00372 for (int32_t i=0; i<trans_list_len; i++)
00373 if (trans_list_forward[i])
00374 SG_FREE(trans_list_forward[i]);
00375 SG_FREE(trans_list_forward);
00376 }
00377 if (trans_list_forward_val)
00378 {
00379 for (int32_t i=0; i<trans_list_len; i++)
00380 if (trans_list_forward_val[i])
00381 SG_FREE(trans_list_forward_val[i]);
00382 SG_FREE(trans_list_forward_val);
00383 }
00384 if (trans_list_backward)
00385 {
00386 for (int32_t i=0; i<trans_list_len; i++)
00387 if (trans_list_backward[i])
00388 SG_FREE(trans_list_backward[i]);
00389 SG_FREE(trans_list_backward);
00390 } ;
00391
00392 free_state_dependend_arrays();
00393
00394 if (!reused_caches)
00395 {
00396 #ifdef USE_HMMPARALLEL_STRUCTURES
00397 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00398 {
00399 SG_FREE(alpha_cache[i].table);
00400 SG_FREE(beta_cache[i].table);
00401 alpha_cache[i].table=NULL;
00402 beta_cache[i].table=NULL;
00403 }
00404
00405 SG_FREE(alpha_cache);
00406 SG_FREE(beta_cache);
00407 alpha_cache=NULL;
00408 beta_cache=NULL;
00409 #else // USE_HMMPARALLEL_STRUCTURES
00410 SG_FREE(alpha_cache.table);
00411 SG_FREE(beta_cache.table);
00412 alpha_cache.table=NULL;
00413 beta_cache.table=NULL;
00414 #endif // USE_HMMPARALLEL_STRUCTURES
00415
00416 SG_FREE(states_per_observation_psi);
00417 states_per_observation_psi=NULL;
00418 }
00419
00420 #ifdef USE_LOGSUMARRAY
00421 #ifdef USE_HMMPARALLEL_STRUCTURES
00422 {
00423 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00424 SG_FREE(arrayS[i]);
00425 SG_FREE(arrayS);
00426 } ;
00427 #else //USE_HMMPARALLEL_STRUCTURES
00428 SG_FREE(arrayS);
00429 #endif //USE_HMMPARALLEL_STRUCTURES
00430 #endif //USE_LOGSUMARRAY
00431
00432 if (!reused_caches)
00433 {
00434 #ifdef USE_HMMPARALLEL_STRUCTURES
00435 SG_FREE(path_prob_updated);
00436 SG_FREE(path_prob_dimension);
00437 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00438 SG_FREE(path[i]);
00439 #endif //USE_HMMPARALLEL_STRUCTURES
00440 SG_FREE(path);
00441 }
00442 }
00443
00444 bool CHMM::train(CFeatures* data)
00445 {
00446 if (data)
00447 {
00448 if (data->get_feature_class() != C_STRING ||
00449 data->get_feature_type() != F_WORD)
00450 {
00451 SG_ERROR("Expected features of class string type word\n");
00452 }
00453 set_observations((CStringFeatures<uint16_t>*) data);
00454 }
00455 return baum_welch_viterbi_train(BW_NORMAL);
00456 }
00457
00458 bool CHMM::alloc_state_dependend_arrays()
00459 {
00460
00461 if (!transition_matrix_a && !observation_matrix_b &&
00462 !initial_state_distribution_p && !end_state_distribution_q)
00463 {
00464 transition_matrix_a=SG_MALLOC(float64_t, N*N);
00465 observation_matrix_b=SG_MALLOC(float64_t, N*M);
00466 initial_state_distribution_p=SG_MALLOC(float64_t, N);
00467 end_state_distribution_q=SG_MALLOC(float64_t, N);
00468 init_model_random();
00469 convert_to_log();
00470 }
00471
00472 #ifdef USE_HMMPARALLEL_STRUCTURES
00473 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00474 {
00475 arrayN1[i]=SG_MALLOC(float64_t, N);
00476 arrayN2[i]=SG_MALLOC(float64_t, N);
00477 }
00478 #else //USE_HMMPARALLEL_STRUCTURES
00479 arrayN1=SG_MALLOC(float64_t, N);
00480 arrayN2=SG_MALLOC(float64_t, N);
00481 #endif //USE_HMMPARALLEL_STRUCTURES
00482
00483 #ifdef LOG_SUMARRAY
00484 #ifdef USE_HMMPARALLEL_STRUCTURES
00485 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00486 arrayS[i]=SG_MALLOC(float64_t, (int32_t)(this->N/2+1));
00487 #else //USE_HMMPARALLEL_STRUCTURES
00488 arrayS=SG_MALLOC(float64_t, (int32_t)(this->N/2+1));
00489 #endif //USE_HMMPARALLEL_STRUCTURES
00490 #endif //LOG_SUMARRAY
00491 transition_matrix_A=SG_MALLOC(float64_t, this->N*this->N);
00492 observation_matrix_B=SG_MALLOC(float64_t, this->N*this->M);
00493
00494 if (p_observations)
00495 {
00496 #ifdef USE_HMMPARALLEL_STRUCTURES
00497 if (alpha_cache[0].table!=NULL)
00498 #else //USE_HMMPARALLEL_STRUCTURES
00499 if (alpha_cache.table!=NULL)
00500 #endif //USE_HMMPARALLEL_STRUCTURES
00501 set_observations(p_observations);
00502 else
00503 set_observation_nocache(p_observations);
00504 SG_UNREF(p_observations);
00505 }
00506
00507 this->invalidate_model();
00508
00509 return ((transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00510 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) &&
00511 (initial_state_distribution_p != NULL) &&
00512 (end_state_distribution_q != NULL));
00513 }
00514
00515 void CHMM::free_state_dependend_arrays()
00516 {
00517 #ifdef USE_HMMPARALLEL_STRUCTURES
00518 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00519 {
00520 SG_FREE(arrayN1[i]);
00521 SG_FREE(arrayN2[i]);
00522
00523 arrayN1[i]=NULL;
00524 arrayN2[i]=NULL;
00525 }
00526 #else
00527 SG_FREE(arrayN1);
00528 SG_FREE(arrayN2);
00529 arrayN1=NULL;
00530 arrayN2=NULL;
00531 #endif
00532 if (observation_matrix_b)
00533 {
00534 SG_FREE(transition_matrix_A);
00535 SG_FREE(observation_matrix_B);
00536 SG_FREE(transition_matrix_a);
00537 SG_FREE(observation_matrix_b);
00538 SG_FREE(initial_state_distribution_p);
00539 SG_FREE(end_state_distribution_q);
00540 } ;
00541
00542 transition_matrix_A=NULL;
00543 observation_matrix_B=NULL;
00544 transition_matrix_a=NULL;
00545 observation_matrix_b=NULL;
00546 initial_state_distribution_p=NULL;
00547 end_state_distribution_q=NULL;
00548 }
00549
00550 bool CHMM::initialize(Model* m, float64_t pseudo, FILE* modelfile)
00551 {
00552
00553 bool files_ok=true;
00554
00555 trans_list_forward = NULL ;
00556 trans_list_forward_cnt = NULL ;
00557 trans_list_forward_val = NULL ;
00558 trans_list_backward = NULL ;
00559 trans_list_backward_cnt = NULL ;
00560 trans_list_len = 0 ;
00561 mem_initialized = false ;
00562
00563 this->transition_matrix_a=NULL;
00564 this->observation_matrix_b=NULL;
00565 this->initial_state_distribution_p=NULL;
00566 this->end_state_distribution_q=NULL;
00567 this->PSEUDO= pseudo;
00568 this->model= m;
00569 this->p_observations=NULL;
00570 this->reused_caches=false;
00571
00572 #ifdef USE_HMMPARALLEL_STRUCTURES
00573 alpha_cache=SG_MALLOC(T_ALPHA_BETA, parallel->get_num_threads());
00574 beta_cache=SG_MALLOC(T_ALPHA_BETA, parallel->get_num_threads());
00575 states_per_observation_psi=SG_MALLOC(P_STATES, parallel->get_num_threads());
00576
00577 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00578 {
00579 this->alpha_cache[i].table=NULL;
00580 this->beta_cache[i].table=NULL;
00581 this->alpha_cache[i].dimension=0;
00582 this->beta_cache[i].dimension=0;
00583 this->states_per_observation_psi[i]=NULL ;
00584 }
00585
00586 #else // USE_HMMPARALLEL_STRUCTURES
00587 this->alpha_cache.table=NULL;
00588 this->beta_cache.table=NULL;
00589 this->alpha_cache.dimension=0;
00590 this->beta_cache.dimension=0;
00591 this->states_per_observation_psi=NULL ;
00592 #endif //USE_HMMPARALLEL_STRUCTURES
00593
00594 if (modelfile)
00595 files_ok= files_ok && load_model(modelfile);
00596
00597 #ifdef USE_HMMPARALLEL_STRUCTURES
00598 path_prob_updated=SG_MALLOC(bool, parallel->get_num_threads());
00599 path_prob_dimension=SG_MALLOC(int, parallel->get_num_threads());
00600
00601 path=SG_MALLOC(P_STATES, parallel->get_num_threads());
00602
00603 for (int32_t i=0; i<parallel->get_num_threads(); i++)
00604 this->path[i]=NULL;
00605
00606 #else // USE_HMMPARALLEL_STRUCTURES
00607 this->path=NULL;
00608
00609 #endif //USE_HMMPARALLEL_STRUCTURES
00610
00611 #ifdef USE_HMMPARALLEL_STRUCTURES
00612 arrayN1=SG_MALLOC(float64_t*, parallel->get_num_threads());
00613 arrayN2=SG_MALLOC(float64_t*, parallel->get_num_threads());
00614 #endif //USE_HMMPARALLEL_STRUCTURES
00615
00616 #ifdef LOG_SUMARRAY
00617 #ifdef USE_HMMPARALLEL_STRUCTURES
00618 arrayS=SG_MALLOC(float64_t*, parallel->get_num_threads());
00619 #endif // USE_HMMPARALLEL_STRUCTURES
00620 #endif //LOG_SUMARRAY
00621
00622 alloc_state_dependend_arrays();
00623
00624 this->loglikelihood=false;
00625 mem_initialized = true ;
00626 this->invalidate_model();
00627
00628 return ((files_ok) &&
00629 (transition_matrix_A != NULL) && (observation_matrix_B != NULL) &&
00630 (transition_matrix_a != NULL) && (observation_matrix_b != NULL) && (initial_state_distribution_p != NULL) &&
00631 (end_state_distribution_q != NULL));
00632 }
00633
00634
00635
00636
00637
00638
00639 float64_t CHMM::forward_comp(int32_t time, int32_t state, int32_t dimension)
00640 {
00641 T_ALPHA_BETA_TABLE* alpha_new;
00642 T_ALPHA_BETA_TABLE* alpha;
00643 T_ALPHA_BETA_TABLE* dummy;
00644 if (time<1)
00645 time=0;
00646
00647
00648 int32_t wanted_time=time;
00649
00650 if (ALPHA_CACHE(dimension).table)
00651 {
00652 alpha=&ALPHA_CACHE(dimension).table[0];
00653 alpha_new=&ALPHA_CACHE(dimension).table[N];
00654 time=p_observations->get_vector_length(dimension)+1;
00655 }
00656 else
00657 {
00658 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00659 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00660 }
00661
00662 if (time<1)
00663 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00664 else
00665 {
00666
00667 for (int32_t i=0; i<N; i++)
00668 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00669
00670
00671 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00672 {
00673
00674 for (int32_t j=0; j<N; j++)
00675 {
00676 register int32_t i, num = trans_list_forward_cnt[j] ;
00677 float64_t sum=-CMath::INFTY;
00678 for (i=0; i < num; i++)
00679 {
00680 int32_t ii = trans_list_forward[j][i] ;
00681 sum = CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii,j));
00682 } ;
00683
00684 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00685 }
00686
00687 if (!ALPHA_CACHE(dimension).table)
00688 {
00689 dummy=alpha;
00690 alpha=alpha_new;
00691 alpha_new=dummy;
00692 }
00693 else
00694 {
00695 alpha=alpha_new;
00696 alpha_new+=N;
00697 }
00698 }
00699
00700
00701 if (time<p_observations->get_vector_length(dimension))
00702 {
00703 register int32_t i, num=trans_list_forward_cnt[state];
00704 register float64_t sum=-CMath::INFTY;
00705 for (i=0; i<num; i++)
00706 {
00707 int32_t ii = trans_list_forward[state][i] ;
00708 sum= CMath::logarithmic_sum(sum, alpha[ii] + get_a(ii, state));
00709 } ;
00710
00711 return sum + get_b(state, p_observations->get_feature(dimension,time));
00712 }
00713 else
00714 {
00715
00716 register int32_t i ;
00717 float64_t sum ;
00718 sum=-CMath::INFTY;
00719 for (i=0; i<N; i++)
00720 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00721
00722 if (!ALPHA_CACHE(dimension).table)
00723 return sum;
00724 else
00725 {
00726 ALPHA_CACHE(dimension).dimension=dimension;
00727 ALPHA_CACHE(dimension).updated=true;
00728 ALPHA_CACHE(dimension).sum=sum;
00729
00730 if (wanted_time<p_observations->get_vector_length(dimension))
00731 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00732 else
00733 return ALPHA_CACHE(dimension).sum;
00734 }
00735 }
00736 }
00737 }
00738
00739
00740
00741
00742
00743 float64_t CHMM::forward_comp_old(int32_t time, int32_t state, int32_t dimension)
00744 {
00745 T_ALPHA_BETA_TABLE* alpha_new;
00746 T_ALPHA_BETA_TABLE* alpha;
00747 T_ALPHA_BETA_TABLE* dummy;
00748 if (time<1)
00749 time=0;
00750
00751 int32_t wanted_time=time;
00752
00753 if (ALPHA_CACHE(dimension).table)
00754 {
00755 alpha=&ALPHA_CACHE(dimension).table[0];
00756 alpha_new=&ALPHA_CACHE(dimension).table[N];
00757 time=p_observations->get_vector_length(dimension)+1;
00758 }
00759 else
00760 {
00761 alpha_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00762 alpha=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00763 }
00764
00765 if (time<1)
00766 return get_p(state) + get_b(state, p_observations->get_feature(dimension,0));
00767 else
00768 {
00769
00770 for (int32_t i=0; i<N; i++)
00771 alpha[i] = get_p(i) + get_b(i, p_observations->get_feature(dimension,0)) ;
00772
00773
00774 for (register int32_t t=1; t<time && t < p_observations->get_vector_length(dimension); t++)
00775 {
00776
00777 for (int32_t j=0; j<N; j++)
00778 {
00779 register int32_t i ;
00780 #ifdef USE_LOGSUMARRAY
00781 for (i=0; i<(N>>1); i++)
00782 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,j),
00783 alpha[(i<<1)+1] + get_a((i<<1)+1,j));
00784 if (N%2==1)
00785 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+
00786 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,j),
00787 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00788 else
00789 alpha_new[j]=get_b(j, p_observations->get_feature(dimension,t))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00790 #else //USE_LOGSUMARRAY
00791 float64_t sum=-CMath::INFTY;
00792 for (i=0; i<N; i++)
00793 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i,j));
00794
00795 alpha_new[j]= sum + get_b(j, p_observations->get_feature(dimension,t));
00796 #endif //USE_LOGSUMARRAY
00797 }
00798
00799 if (!ALPHA_CACHE(dimension).table)
00800 {
00801 dummy=alpha;
00802 alpha=alpha_new;
00803 alpha_new=dummy;
00804 }
00805 else
00806 {
00807 alpha=alpha_new;
00808 alpha_new+=N;
00809 }
00810 }
00811
00812
00813 if (time<p_observations->get_vector_length(dimension))
00814 {
00815 register int32_t i;
00816 #ifdef USE_LOGSUMARRAY
00817 for (i=0; i<(N>>1); i++)
00818 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_a(i<<1,state),
00819 alpha[(i<<1)+1] + get_a((i<<1)+1,state));
00820 if (N%2==1)
00821 return get_b(state, p_observations->get_feature(dimension,time))+
00822 CMath::logarithmic_sum(alpha[N-1]+get_a(N-1,state),
00823 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00824 else
00825 return get_b(state, p_observations->get_feature(dimension,time))+CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00826 #else //USE_LOGSUMARRAY
00827 register float64_t sum=-CMath::INFTY;
00828 for (i=0; i<N; i++)
00829 sum= CMath::logarithmic_sum(sum, alpha[i] + get_a(i, state));
00830
00831 return sum + get_b(state, p_observations->get_feature(dimension,time));
00832 #endif //USE_LOGSUMARRAY
00833 }
00834 else
00835 {
00836
00837 register int32_t i ;
00838 float64_t sum ;
00839 #ifdef USE_LOGSUMARRAY
00840 for (i=0; i<(N>>1); i++)
00841 ARRAYS(dimension)[i]=CMath::logarithmic_sum(alpha[i<<1] + get_q(i<<1),
00842 alpha[(i<<1)+1] + get_q((i<<1)+1));
00843 if (N%2==1)
00844 sum=CMath::logarithmic_sum(alpha[N-1]+get_q(N-1),
00845 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
00846 else
00847 sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
00848 #else //USE_LOGSUMARRAY
00849 sum=-CMath::INFTY;
00850 for (i=0; i<N; i++)
00851 sum=CMath::logarithmic_sum(sum, alpha[i] + get_q(i));
00852 #endif //USE_LOGSUMARRAY
00853
00854 if (!ALPHA_CACHE(dimension).table)
00855 return sum;
00856 else
00857 {
00858 ALPHA_CACHE(dimension).dimension=dimension;
00859 ALPHA_CACHE(dimension).updated=true;
00860 ALPHA_CACHE(dimension).sum=sum;
00861
00862 if (wanted_time<p_observations->get_vector_length(dimension))
00863 return ALPHA_CACHE(dimension).table[wanted_time*N+state];
00864 else
00865 return ALPHA_CACHE(dimension).sum;
00866 }
00867 }
00868 }
00869 }
00870
00871
00872
00873
00874
00875 float64_t CHMM::backward_comp(int32_t time, int32_t state, int32_t dimension)
00876 {
00877 T_ALPHA_BETA_TABLE* beta_new;
00878 T_ALPHA_BETA_TABLE* beta;
00879 T_ALPHA_BETA_TABLE* dummy;
00880 int32_t wanted_time=time;
00881
00882 if (time<0)
00883 forward(time, state, dimension);
00884
00885 if (BETA_CACHE(dimension).table)
00886 {
00887 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00888 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00889 time=-1;
00890 }
00891 else
00892 {
00893 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00894 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00895 }
00896
00897 if (time>=p_observations->get_vector_length(dimension)-1)
00898
00899
00900 return get_q(state);
00901 else
00902 {
00903
00904 for (register int32_t i=0; i<N; i++)
00905 beta[i]=get_q(i);
00906
00907
00908 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
00909 {
00910 for (register int32_t i=0; i<N; i++)
00911 {
00912 register int32_t j, num=trans_list_backward_cnt[i] ;
00913 float64_t sum=-CMath::INFTY;
00914 for (j=0; j<num; j++)
00915 {
00916 int32_t jj = trans_list_backward[i][j] ;
00917 sum= CMath::logarithmic_sum(sum, get_a(i, jj) + get_b(jj, p_observations->get_feature(dimension,t)) + beta[jj]);
00918 } ;
00919 beta_new[i]=sum;
00920 }
00921
00922 if (!BETA_CACHE(dimension).table)
00923 {
00924 dummy=beta;
00925 beta=beta_new;
00926 beta_new=dummy;
00927 }
00928 else
00929 {
00930 beta=beta_new;
00931 beta_new-=N;
00932 }
00933 }
00934
00935 if (time>=0)
00936 {
00937 register int32_t j, num=trans_list_backward_cnt[state] ;
00938 float64_t sum=-CMath::INFTY;
00939 for (j=0; j<num; j++)
00940 {
00941 int32_t jj = trans_list_backward[state][j] ;
00942 sum= CMath::logarithmic_sum(sum, get_a(state, jj) + get_b(jj, p_observations->get_feature(dimension,time+1))+beta[jj]);
00943 } ;
00944 return sum;
00945 }
00946 else
00947 {
00948 if (BETA_CACHE(dimension).table)
00949 {
00950 float64_t sum=-CMath::INFTY;
00951 for (register int32_t j=0; j<N; j++)
00952 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00953 BETA_CACHE(dimension).sum=sum;
00954 BETA_CACHE(dimension).dimension=dimension;
00955 BETA_CACHE(dimension).updated=true;
00956
00957 if (wanted_time<p_observations->get_vector_length(dimension))
00958 return BETA_CACHE(dimension).table[wanted_time*N+state];
00959 else
00960 return BETA_CACHE(dimension).sum;
00961 }
00962 else
00963 {
00964 float64_t sum=-CMath::INFTY;
00965 for (register int32_t j=0; j<N; j++)
00966 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
00967 return sum;
00968 }
00969 }
00970 }
00971 }
00972
00973
00974 float64_t CHMM::backward_comp_old(
00975 int32_t time, int32_t state, int32_t dimension)
00976 {
00977 T_ALPHA_BETA_TABLE* beta_new;
00978 T_ALPHA_BETA_TABLE* beta;
00979 T_ALPHA_BETA_TABLE* dummy;
00980 int32_t wanted_time=time;
00981
00982 if (time<0)
00983 forward(time, state, dimension);
00984
00985 if (BETA_CACHE(dimension).table)
00986 {
00987 beta=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-1)];
00988 beta_new=&BETA_CACHE(dimension).table[N*(p_observations->get_vector_length(dimension)-2)];
00989 time=-1;
00990 }
00991 else
00992 {
00993 beta_new=(T_ALPHA_BETA_TABLE*)ARRAYN1(dimension);
00994 beta=(T_ALPHA_BETA_TABLE*)ARRAYN2(dimension);
00995 }
00996
00997 if (time>=p_observations->get_vector_length(dimension)-1)
00998
00999
01000 return get_q(state);
01001 else
01002 {
01003
01004 for (register int32_t i=0; i<N; i++)
01005 beta[i]=get_q(i);
01006
01007
01008 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>time+1 && t>0; t--)
01009 {
01010 for (register int32_t i=0; i<N; i++)
01011 {
01012 register int32_t j ;
01013 #ifdef USE_LOGSUMARRAY
01014 for (j=0; j<(N>>1); j++)
01015 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01016 get_a(i, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,t)) + beta[j<<1],
01017 get_a(i, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,t)) + beta[(j<<1)+1]);
01018 if (N%2==1)
01019 beta_new[i]=CMath::logarithmic_sum(get_a(i, N-1) + get_b(N-1, p_observations->get_feature(dimension,t)) + beta[N-1],
01020 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01021 else
01022 beta_new[i]=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01023 #else //USE_LOGSUMARRAY
01024 float64_t sum=-CMath::INFTY;
01025 for (j=0; j<N; j++)
01026 sum= CMath::logarithmic_sum(sum, get_a(i, j) + get_b(j, p_observations->get_feature(dimension,t)) + beta[j]);
01027
01028 beta_new[i]=sum;
01029 #endif //USE_LOGSUMARRAY
01030 }
01031
01032 if (!BETA_CACHE(dimension).table)
01033 {
01034 dummy=beta;
01035 beta=beta_new;
01036 beta_new=dummy;
01037 }
01038 else
01039 {
01040 beta=beta_new;
01041 beta_new-=N;
01042 }
01043 }
01044
01045 if (time>=0)
01046 {
01047 register int32_t j ;
01048 #ifdef USE_LOGSUMARRAY
01049 for (j=0; j<(N>>1); j++)
01050 ARRAYS(dimension)[j]=CMath::logarithmic_sum(
01051 get_a(state, j<<1) + get_b(j<<1, p_observations->get_feature(dimension,time+1)) + beta[j<<1],
01052 get_a(state, (j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,time+1)) + beta[(j<<1)+1]);
01053 if (N%2==1)
01054 return CMath::logarithmic_sum(get_a(state, N-1) + get_b(N-1, p_observations->get_feature(dimension,time+1)) + beta[N-1],
01055 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01056 else
01057 return CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01058 #else //USE_LOGSUMARRAY
01059 float64_t sum=-CMath::INFTY;
01060 for (j=0; j<N; j++)
01061 sum= CMath::logarithmic_sum(sum, get_a(state, j) + get_b(j, p_observations->get_feature(dimension,time+1))+beta[j]);
01062
01063 return sum;
01064 #endif //USE_LOGSUMARRAY
01065 }
01066 else
01067 {
01068 if (BETA_CACHE(dimension).table)
01069 {
01070 #ifdef USE_LOGSUMARRAY//AAA
01071 for (int32_t j=0; j<(N>>1); j++)
01072 ARRAYS(dimension)[j]=CMath::logarithmic_sum(get_p(j<<1) + get_b(j<<1, p_observations->get_feature(dimension,0))+beta[j<<1],
01073 get_p((j<<1)+1) + get_b((j<<1)+1, p_observations->get_feature(dimension,0))+beta[(j<<1)+1]) ;
01074 if (N%2==1)
01075 BETA_CACHE(dimension).sum=CMath::logarithmic_sum(get_p(N-1) + get_b(N-1, p_observations->get_feature(dimension,0))+beta[N-1],
01076 CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1)) ;
01077 else
01078 BETA_CACHE(dimension).sum=CMath::logarithmic_sum_array(ARRAYS(dimension), N>>1) ;
01079 #else //USE_LOGSUMARRAY
01080 float64_t sum=-CMath::INFTY;
01081 for (register int32_t j=0; j<N; j++)
01082 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01083 BETA_CACHE(dimension).sum=sum;
01084 #endif //USE_LOGSUMARRAY
01085 BETA_CACHE(dimension).dimension=dimension;
01086 BETA_CACHE(dimension).updated=true;
01087
01088 if (wanted_time<p_observations->get_vector_length(dimension))
01089 return BETA_CACHE(dimension).table[wanted_time*N+state];
01090 else
01091 return BETA_CACHE(dimension).sum;
01092 }
01093 else
01094 {
01095 float64_t sum=-CMath::INFTY;
01096 for (register int32_t j=0; j<N; j++)
01097 sum= CMath::logarithmic_sum(sum, get_p(j) + get_b(j, p_observations->get_feature(dimension,0))+beta[j]);
01098 return sum;
01099 }
01100 }
01101 }
01102 }
01103
01104
01105
01106 float64_t CHMM::best_path(int32_t dimension)
01107 {
01108 if (!p_observations)
01109 return -1;
01110
01111 if (dimension==-1)
01112 {
01113 if (!all_path_prob_updated)
01114 {
01115 SG_INFO( "computing full viterbi likelihood\n") ;
01116 float64_t sum = 0 ;
01117 for (int32_t i=0; i<p_observations->get_num_vectors(); i++)
01118 sum+=best_path(i) ;
01119 sum /= p_observations->get_num_vectors() ;
01120 all_pat_prob=sum ;
01121 all_path_prob_updated=true ;
01122 return sum ;
01123 } else
01124 return all_pat_prob ;
01125 } ;
01126
01127 if (!STATES_PER_OBSERVATION_PSI(dimension))
01128 return -1 ;
01129
01130 if (dimension >= p_observations->get_num_vectors())
01131 return -1;
01132
01133 if (PATH_PROB_UPDATED(dimension) && dimension==PATH_PROB_DIMENSION(dimension))
01134 return pat_prob;
01135 else
01136 {
01137 register float64_t* delta= ARRAYN2(dimension);
01138 register float64_t* delta_new= ARRAYN1(dimension);
01139
01140 {
01141 for (register int32_t i=0; i<N; i++)
01142 {
01143 delta[i]=get_p(i)+get_b(i, p_observations->get_feature(dimension,0));
01144 set_psi(0, i, 0, dimension);
01145 }
01146 }
01147
01148 #ifdef USE_PATHDEBUG
01149 float64_t worst=-CMath::INFTY/4 ;
01150 #endif
01151
01152 for (register int32_t t=1; t<p_observations->get_vector_length(dimension); t++)
01153 {
01154 register float64_t* dummy;
01155 register int32_t NN=N ;
01156 for (register int32_t j=0; j<NN; j++)
01157 {
01158 register float64_t * matrix_a=&transition_matrix_a[j*N] ;
01159 register float64_t maxj=delta[0] + matrix_a[0];
01160 register int32_t argmax=0;
01161
01162 for (register int32_t i=1; i<NN; i++)
01163 {
01164 register float64_t temp = delta[i] + matrix_a[i];
01165
01166 if (temp>maxj)
01167 {
01168 maxj=temp;
01169 argmax=i;
01170 }
01171 }
01172 #ifdef FIX_POS
01173 if ((!model) || (model->get_fix_pos_state(t,j,NN)!=Model::FIX_DISALLOWED))
01174 #endif
01175 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t));
01176 #ifdef FIX_POS
01177 else
01178 delta_new[j]=maxj + get_b(j,p_observations->get_feature(dimension,t)) + Model::DISALLOWED_PENALTY;
01179 #endif
01180 set_psi(t, j, argmax, dimension);
01181 }
01182
01183 #ifdef USE_PATHDEBUG
01184 float64_t best=log(0) ;
01185 for (int32_t jj=0; jj<N; jj++)
01186 if (delta_new[jj]>best)
01187 best=delta_new[jj] ;
01188
01189 if (best<-CMath::INFTY/2)
01190 {
01191 SG_DEBUG( "worst case at %i: %e:%e\n", t, best, worst) ;
01192 worst=best ;
01193 } ;
01194 #endif
01195
01196 dummy=delta;
01197 delta=delta_new;
01198 delta_new=dummy;
01199 }
01200
01201
01202 {
01203 register float64_t maxj=delta[0]+get_q(0);
01204 register int32_t argmax=0;
01205
01206 for (register int32_t i=1; i<N; i++)
01207 {
01208 register float64_t temp=delta[i]+get_q(i);
01209
01210 if (temp>maxj)
01211 {
01212 maxj=temp;
01213 argmax=i;
01214 }
01215 }
01216 pat_prob=maxj;
01217 PATH(dimension)[p_observations->get_vector_length(dimension)-1]=argmax;
01218 } ;
01219
01220
01221 {
01222 for (register int32_t t=p_observations->get_vector_length(dimension)-1; t>0; t--)
01223 {
01224 PATH(dimension)[t-1]=get_psi(t, PATH(dimension)[t], dimension);
01225 }
01226 }
01227 PATH_PROB_UPDATED(dimension)=true;
01228 PATH_PROB_DIMENSION(dimension)=dimension;
01229 return pat_prob ;
01230 }
01231 }
01232
01233 #ifndef USE_HMMPARALLEL
01234 float64_t CHMM::model_probability_comp()
01235 {
01236
01237 mod_prob=0 ;
01238 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
01239 mod_prob+=forward(p_observations->get_vector_length(dim), 0, dim);
01240
01241 mod_prob_updated=true;
01242 return mod_prob;
01243 }
01244
01245 #else
01246
01247 float64_t CHMM::model_probability_comp()
01248 {
01249 pthread_t *threads=SG_MALLOC(pthread_t, parallel->get_num_threads());
01250 S_BW_THREAD_PARAM *params=SG_MALLOC(S_BW_THREAD_PARAM, parallel->get_num_threads());
01251
01252 SG_INFO( "computing full model probablity\n");
01253 mod_prob=0;
01254
01255 for (int32_t cpu=0; cpu<parallel->get_num_threads(); cpu++)
01256 {
01257 params[cpu].hmm=this ;
01258 params[cpu].dim_start= p_observations->get_num_vectors()*cpu/parallel->get_num_threads();
01259 params[cpu].dim_stop= p_observations->get_num_vectors()*(cpu+1)/parallel->get_num_threads();
01260 params[cpu].p_buf=SG_MALLOC(float64_t, N);
01261 params[cpu].q_buf=SG_MALLOC(float64_t, N);
01262 params[cpu].a_buf=SG_MALLOC(float64_t, N*N);
01263 params[cpu].b_buf=SG_MALLOC(float64_t, N*M);
01264 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, (void*)¶ms[cpu]);
01265 }
01266
01267 for (int32_t cpu=0; cpu<parallel->get_num_threads(); cpu++)
01268 {
01269 pthread_join(threads[cpu], NULL);
01270 mod_prob+=params[cpu].ret;
01271 }
01272
01273 for (int32_t i=0; i<parallel->get_num_threads(); i++)
01274 {
01275 SG_FREE(params[i].p_buf);
01276 SG_FREE(params[i].q_buf);
01277 SG_FREE(params[i].a_buf);
01278 SG_FREE(params[i].b_buf);
01279 }
01280
01281 SG_FREE(threads);
01282 SG_FREE(params);
01283
01284 mod_prob_updated=true;
01285 return mod_prob;
01286 }
01287
01288 void* CHMM::bw_dim_prefetch(void* params)
01289 {
01290 CHMM* hmm=((S_BW_THREAD_PARAM*) params)->hmm;
01291 int32_t start=((S_BW_THREAD_PARAM*) params)->dim_start;
01292 int32_t stop=((S_BW_THREAD_PARAM*) params)->dim_stop;
01293 float64_t* p_buf=((S_BW_THREAD_PARAM*) params)->p_buf;
01294 float64_t* q_buf=((S_BW_THREAD_PARAM*) params)->q_buf;
01295 float64_t* a_buf=((S_BW_THREAD_PARAM*) params)->a_buf;
01296 float64_t* b_buf=((S_BW_THREAD_PARAM*) params)->b_buf;
01297 ((S_BW_THREAD_PARAM*)params)->ret=0;
01298
01299 for (int32_t dim=start; dim<stop; dim++)
01300 {
01301 hmm->forward_comp(hmm->p_observations->get_vector_length(dim), hmm->N-1, dim) ;
01302 hmm->backward_comp(hmm->p_observations->get_vector_length(dim), hmm->N-1, dim) ;
01303 float64_t modprob=hmm->model_probability(dim) ;
01304 hmm->ab_buf_comp(p_buf, q_buf, a_buf, b_buf, dim) ;
01305 ((S_BW_THREAD_PARAM*)params)->ret+= modprob;
01306 }
01307 return NULL ;
01308 }
01309
01310 void* CHMM::bw_single_dim_prefetch(void * params)
01311 {
01312 CHMM* hmm=((S_BW_THREAD_PARAM*)params)->hmm ;
01313 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
01314 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->model_probability(dim);
01315 return NULL ;
01316 }
01317
01318 void* CHMM::vit_dim_prefetch(void * params)
01319 {
01320 CHMM* hmm=((S_DIM_THREAD_PARAM*)params)->hmm ;
01321 int32_t dim=((S_DIM_THREAD_PARAM*)params)->dim ;
01322 ((S_DIM_THREAD_PARAM*)params)->prob_sum = hmm->best_path(dim);
01323 return NULL ;
01324 }
01325
01326 #endif //USE_HMMPARALLEL
01327
01328
01329 #ifdef USE_HMMPARALLEL
01330
01331 void CHMM::ab_buf_comp(
01332 float64_t* p_buf, float64_t* q_buf, float64_t *a_buf, float64_t* b_buf,
01333 int32_t dim)
01334 {
01335 int32_t i,j,t ;
01336 float64_t a_sum;
01337 float64_t b_sum;
01338
01339 float64_t dimmodprob=model_probability(dim);
01340
01341 for (i=0; i<N; i++)
01342 {
01343
01344 p_buf[i]=get_p(i)+get_b(i,p_observations->get_feature(dim,0))+backward(0,i,dim) - dimmodprob;
01345 q_buf[i]=forward(p_observations->get_vector_length(dim)-1, i, dim)+get_q(i) - dimmodprob;
01346
01347
01348 for (j=0; j<N; j++)
01349 {
01350 a_sum=-CMath::INFTY;
01351
01352 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01353 {
01354 a_sum= CMath::logarithmic_sum(a_sum, forward(t,i,dim)+
01355 get_a(i,j)+get_b(j,p_observations->get_feature(dim,t+1))+backward(t+1,j,dim));
01356 }
01357 a_buf[N*i+j]=a_sum-dimmodprob ;
01358 }
01359
01360
01361 for (j=0; j<M; j++)
01362 {
01363 b_sum=-CMath::INFTY;
01364
01365 for (t=0; t<p_observations->get_vector_length(dim); t++)
01366 {
01367 if (p_observations->get_feature(dim,t)==j)
01368 b_sum=CMath::logarithmic_sum(b_sum, forward(t,i,dim)+backward(t, i, dim));
01369 }
01370
01371 b_buf[M*i+j]=b_sum-dimmodprob ;
01372 }
01373 }
01374 }
01375
01376
01377 void CHMM::estimate_model_baum_welch(CHMM* hmm)
01378 {
01379 int32_t i,j,cpu;
01380 float64_t fullmodprob=0;
01381
01382
01383 for (i=0; i<N; i++)
01384 {
01385 if (hmm->get_p(i)>CMath::ALMOST_NEG_INFTY)
01386 set_p(i,log(PSEUDO));
01387 else
01388 set_p(i,hmm->get_p(i));
01389 if (hmm->get_q(i)>CMath::ALMOST_NEG_INFTY)
01390 set_q(i,log(PSEUDO));
01391 else
01392 set_q(i,hmm->get_q(i));
01393
01394 for (j=0; j<N; j++)
01395 if (hmm->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01396 set_a(i,j, log(PSEUDO));
01397 else
01398 set_a(i,j,hmm->get_a(i,j));
01399 for (j=0; j<M; j++)
01400 if (hmm->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01401 set_b(i,j, log(PSEUDO));
01402 else
01403 set_b(i,j,hmm->get_b(i,j));
01404 }
01405 invalidate_model();
01406
01407 int32_t num_threads = parallel->get_num_threads();
01408
01409 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
01410 S_BW_THREAD_PARAM *params=SG_MALLOC(S_BW_THREAD_PARAM, num_threads);
01411
01412 if (p_observations->get_num_vectors()<num_threads)
01413 num_threads=p_observations->get_num_vectors();
01414
01415 for (cpu=0; cpu<num_threads; cpu++)
01416 {
01417 params[cpu].p_buf=SG_MALLOC(float64_t, N);
01418 params[cpu].q_buf=SG_MALLOC(float64_t, N);
01419 params[cpu].a_buf=SG_MALLOC(float64_t, N*N);
01420 params[cpu].b_buf=SG_MALLOC(float64_t, N*M);
01421
01422 params[cpu].hmm=hmm;
01423 int32_t start = p_observations->get_num_vectors()*cpu / num_threads;
01424 int32_t stop=p_observations->get_num_vectors()*(cpu+1) / num_threads;
01425
01426 if (cpu == parallel->get_num_threads()-1)
01427 stop=p_observations->get_num_vectors();
01428
01429 ASSERT(start<stop);
01430 params[cpu].dim_start=start;
01431 params[cpu].dim_stop=stop;
01432
01433 pthread_create(&threads[cpu], NULL, bw_dim_prefetch, ¶ms[cpu]);
01434 }
01435
01436 for (cpu=0; cpu<num_threads; cpu++)
01437 {
01438 pthread_join(threads[cpu], NULL);
01439
01440 for (i=0; i<N; i++)
01441 {
01442
01443 set_p(i, CMath::logarithmic_sum(get_p(i), params[cpu].p_buf[i]));
01444 set_q(i, CMath::logarithmic_sum(get_q(i), params[cpu].q_buf[i]));
01445
01446
01447 for (j=0; j<N; j++)
01448 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), params[cpu].a_buf[N*i+j]));
01449
01450
01451 for (j=0; j<M; j++)
01452 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), params[cpu].b_buf[M*i+j]));
01453 }
01454
01455 fullmodprob+=params[cpu].ret;
01456
01457 }
01458
01459 for (cpu=0; cpu<num_threads; cpu++)
01460 {
01461 SG_FREE(params[cpu].p_buf);
01462 SG_FREE(params[cpu].q_buf);
01463 SG_FREE(params[cpu].a_buf);
01464 SG_FREE(params[cpu].b_buf);
01465 }
01466
01467 SG_FREE(threads);
01468 SG_FREE(params);
01469
01470
01471 hmm->mod_prob=fullmodprob;
01472 hmm->mod_prob_updated=true ;
01473
01474
01475 normalize();
01476 invalidate_model();
01477 }
01478
01479 #else // USE_HMMPARALLEL
01480
01481
01482 void CHMM::estimate_model_baum_welch(CHMM* estimate)
01483 {
01484 int32_t i,j,t,dim;
01485 float64_t a_sum, b_sum;
01486 float64_t dimmodprob=0;
01487 float64_t fullmodprob=0;
01488
01489
01490 for (i=0; i<N; i++)
01491 {
01492 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01493 set_p(i,log(PSEUDO));
01494 else
01495 set_p(i,estimate->get_p(i));
01496 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01497 set_q(i,log(PSEUDO));
01498 else
01499 set_q(i,estimate->get_q(i));
01500
01501 for (j=0; j<N; j++)
01502 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01503 set_a(i,j, log(PSEUDO));
01504 else
01505 set_a(i,j,estimate->get_a(i,j));
01506 for (j=0; j<M; j++)
01507 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01508 set_b(i,j, log(PSEUDO));
01509 else
01510 set_b(i,j,estimate->get_b(i,j));
01511 }
01512 invalidate_model();
01513
01514
01515 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01516 {
01517 dimmodprob=estimate->model_probability(dim);
01518 fullmodprob+=dimmodprob ;
01519
01520 for (i=0; i<N; i++)
01521 {
01522
01523 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01524 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01525
01526 int32_t num = trans_list_backward_cnt[i] ;
01527
01528
01529 for (j=0; j<num; j++)
01530 {
01531 int32_t jj = trans_list_backward[i][j] ;
01532 a_sum=-CMath::INFTY;
01533
01534 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01535 {
01536 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01537 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01538 }
01539 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01540 }
01541
01542
01543 for (j=0; j<M; j++)
01544 {
01545 b_sum=-CMath::INFTY;
01546
01547 for (t=0; t<p_observations->get_vector_length(dim); t++)
01548 {
01549 if (p_observations->get_feature(dim,t)==j)
01550 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01551 }
01552
01553 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01554 }
01555 }
01556 }
01557
01558
01559 estimate->mod_prob=fullmodprob;
01560 estimate->mod_prob_updated=true ;
01561
01562
01563 normalize();
01564 invalidate_model();
01565 }
01566
01567
01568 void CHMM::estimate_model_baum_welch_old(CHMM* estimate)
01569 {
01570 int32_t i,j,t,dim;
01571 float64_t a_sum, b_sum;
01572 float64_t dimmodprob=0;
01573 float64_t fullmodprob=0;
01574
01575
01576 for (i=0; i<N; i++)
01577 {
01578 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01579 set_p(i,log(PSEUDO));
01580 else
01581 set_p(i,estimate->get_p(i));
01582 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01583 set_q(i,log(PSEUDO));
01584 else
01585 set_q(i,estimate->get_q(i));
01586
01587 for (j=0; j<N; j++)
01588 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01589 set_a(i,j, log(PSEUDO));
01590 else
01591 set_a(i,j,estimate->get_a(i,j));
01592 for (j=0; j<M; j++)
01593 if (estimate->get_b(i,j)>CMath::ALMOST_NEG_INFTY)
01594 set_b(i,j, log(PSEUDO));
01595 else
01596 set_b(i,j,estimate->get_b(i,j));
01597 }
01598 invalidate_model();
01599
01600
01601 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01602 {
01603 dimmodprob=estimate->model_probability(dim);
01604 fullmodprob+=dimmodprob ;
01605
01606 for (i=0; i<N; i++)
01607 {
01608
01609 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01610 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01611
01612
01613 for (j=0; j<N; j++)
01614 {
01615 a_sum=-CMath::INFTY;
01616
01617 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01618 {
01619 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01620 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01621 }
01622 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum-dimmodprob));
01623 }
01624
01625
01626 for (j=0; j<M; j++)
01627 {
01628 b_sum=-CMath::INFTY;
01629
01630 for (t=0; t<p_observations->get_vector_length(dim); t++)
01631 {
01632 if (p_observations->get_feature(dim,t)==j)
01633 b_sum=CMath::logarithmic_sum(b_sum, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01634 }
01635
01636 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum-dimmodprob));
01637 }
01638 }
01639 }
01640
01641
01642 estimate->mod_prob=fullmodprob;
01643 estimate->mod_prob_updated=true ;
01644
01645
01646 normalize();
01647 invalidate_model();
01648 }
01649 #endif // USE_HMMPARALLEL
01650
01651
01652
01653 void CHMM::estimate_model_baum_welch_trans(CHMM* estimate)
01654 {
01655 int32_t i,j,t,dim;
01656 float64_t a_sum;
01657 float64_t dimmodprob=0;
01658 float64_t fullmodprob=0;
01659
01660
01661 for (i=0; i<N; i++)
01662 {
01663 if (estimate->get_p(i)>CMath::ALMOST_NEG_INFTY)
01664 set_p(i,log(PSEUDO));
01665 else
01666 set_p(i,estimate->get_p(i));
01667 if (estimate->get_q(i)>CMath::ALMOST_NEG_INFTY)
01668 set_q(i,log(PSEUDO));
01669 else
01670 set_q(i,estimate->get_q(i));
01671
01672 for (j=0; j<N; j++)
01673 if (estimate->get_a(i,j)>CMath::ALMOST_NEG_INFTY)
01674 set_a(i,j, log(PSEUDO));
01675 else
01676 set_a(i,j,estimate->get_a(i,j));
01677 for (j=0; j<M; j++)
01678 set_b(i,j,estimate->get_b(i,j));
01679 }
01680 invalidate_model();
01681
01682
01683 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01684 {
01685 dimmodprob=estimate->model_probability(dim);
01686 fullmodprob+=dimmodprob ;
01687
01688 for (i=0; i<N; i++)
01689 {
01690
01691 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->get_p(i)+estimate->get_b(i,p_observations->get_feature(dim,0))+estimate->backward(0,i,dim) - dimmodprob));
01692 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+estimate->get_q(i) - dimmodprob ));
01693
01694 int32_t num = trans_list_backward_cnt[i] ;
01695
01696 for (j=0; j<num; j++)
01697 {
01698 int32_t jj = trans_list_backward[i][j] ;
01699 a_sum=-CMath::INFTY;
01700
01701 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01702 {
01703 a_sum= CMath::logarithmic_sum(a_sum, estimate->forward(t,i,dim)+
01704 estimate->get_a(i,jj)+estimate->get_b(jj,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,jj,dim));
01705 }
01706 set_a(i,jj, CMath::logarithmic_sum(get_a(i,jj), a_sum-dimmodprob));
01707 }
01708 }
01709 }
01710
01711
01712 estimate->mod_prob=fullmodprob;
01713 estimate->mod_prob_updated=true ;
01714
01715
01716 normalize();
01717 invalidate_model();
01718 }
01719
01720
01721
01722
01723 void CHMM::estimate_model_baum_welch_defined(CHMM* estimate)
01724 {
01725 int32_t i,j,old_i,k,t,dim;
01726 float64_t a_sum_num, b_sum_num;
01727 float64_t a_sum_denom, b_sum_denom;
01728 float64_t dimmodprob=-CMath::INFTY;
01729 float64_t fullmodprob=0;
01730 float64_t* A=ARRAYN1(0);
01731 float64_t* B=ARRAYN2(0);
01732
01733
01734
01735 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01736 set_p(i,log(PSEUDO));
01737
01738 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01739 set_q(i,log(PSEUDO));
01740
01741 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01742 {
01743 j=model->get_learn_a(k,1);
01744 set_a(i,j, log(PSEUDO));
01745 }
01746
01747 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01748 {
01749 j=model->get_learn_b(k,1);
01750 set_b(i,j, log(PSEUDO));
01751 }
01752
01753 for (i=0; i<N; i++)
01754 {
01755 A[i]=log(PSEUDO);
01756 B[i]=log(PSEUDO);
01757 }
01758
01759 #ifdef USE_HMMPARALLEL
01760 int32_t num_threads = parallel->get_num_threads();
01761 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
01762 S_DIM_THREAD_PARAM *params=SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
01763
01764 if (p_observations->get_num_vectors()<num_threads)
01765 num_threads=p_observations->get_num_vectors();
01766 #endif
01767
01768
01769 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
01770 {
01771 #ifdef USE_HMMPARALLEL
01772 if (dim%num_threads==0)
01773 {
01774 for (i=0; i<num_threads; i++)
01775 {
01776 if (dim+i<p_observations->get_num_vectors())
01777 {
01778 params[i].hmm=estimate ;
01779 params[i].dim=dim+i ;
01780 pthread_create(&threads[i], NULL, bw_single_dim_prefetch, (void*)¶ms[i]) ;
01781 }
01782 }
01783 for (i=0; i<num_threads; i++)
01784 {
01785 if (dim+i<p_observations->get_num_vectors())
01786 {
01787 pthread_join(threads[i], NULL);
01788 dimmodprob = params[i].prob_sum;
01789 }
01790 }
01791 }
01792 #else
01793 dimmodprob=estimate->model_probability(dim);
01794 #endif // USE_HMMPARALLEL
01795
01796
01797 fullmodprob+= dimmodprob;
01798
01799
01800 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01801 set_p(i, CMath::logarithmic_sum(get_p(i), estimate->forward(0,i,dim)+estimate->backward(0,i,dim) - dimmodprob ) );
01802
01803 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01804 set_q(i, CMath::logarithmic_sum(get_q(i), estimate->forward(p_observations->get_vector_length(dim)-1, i, dim)+
01805 estimate->backward(p_observations->get_vector_length(dim)-1, i, dim) - dimmodprob ) );
01806
01807
01808 old_i=-1;
01809 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01810 {
01811
01812
01813 if (old_i!=i)
01814 {
01815 old_i=i;
01816 a_sum_denom=-CMath::INFTY;
01817
01818 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01819 a_sum_denom= CMath::logarithmic_sum(a_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
01820
01821 A[i]= CMath::logarithmic_sum(A[i], a_sum_denom-dimmodprob);
01822 }
01823
01824 j=model->get_learn_a(k,1);
01825 a_sum_num=-CMath::INFTY;
01826 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01827 {
01828 a_sum_num= CMath::logarithmic_sum(a_sum_num, estimate->forward(t,i,dim)+
01829 estimate->get_a(i,j)+estimate->get_b(j,p_observations->get_feature(dim,t+1))+estimate->backward(t+1,j,dim));
01830 }
01831
01832 set_a(i,j, CMath::logarithmic_sum(get_a(i,j), a_sum_num-dimmodprob));
01833 }
01834
01835
01836 old_i=-1;
01837 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01838 {
01839
01840
01841
01842 if (old_i!=i)
01843 {
01844 old_i=i;
01845 b_sum_denom=-CMath::INFTY;
01846
01847 for (t=0; t<p_observations->get_vector_length(dim); t++)
01848 b_sum_denom= CMath::logarithmic_sum(b_sum_denom, estimate->forward(t,i,dim)+estimate->backward(t,i,dim));
01849
01850 B[i]= CMath::logarithmic_sum(B[i], b_sum_denom-dimmodprob);
01851 }
01852
01853 j=model->get_learn_b(k,1);
01854 b_sum_num=-CMath::INFTY;
01855 for (t=0; t<p_observations->get_vector_length(dim); t++)
01856 {
01857 if (p_observations->get_feature(dim,t)==j)
01858 b_sum_num=CMath::logarithmic_sum(b_sum_num, estimate->forward(t,i,dim)+estimate->backward(t, i, dim));
01859 }
01860
01861 set_b(i,j, CMath::logarithmic_sum(get_b(i,j), b_sum_num-dimmodprob));
01862 }
01863 }
01864 #ifdef USE_HMMPARALLEL
01865 SG_FREE(threads);
01866 SG_FREE(params);
01867 #endif
01868
01869
01870
01871 for (k=0; (i=model->get_learn_p(k))!=-1; k++)
01872 set_p(i, get_p(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
01873
01874 for (k=0; (i=model->get_learn_q(k))!=-1; k++)
01875 set_q(i, get_q(i)-log(p_observations->get_num_vectors()+N*PSEUDO) );
01876
01877 for (k=0; (i=model->get_learn_a(k,0))!=-1; k++)
01878 {
01879 j=model->get_learn_a(k,1);
01880 set_a(i,j, get_a(i,j) - A[i]);
01881 }
01882
01883 for (k=0; (i=model->get_learn_b(k,0))!=-1; k++)
01884 {
01885 j=model->get_learn_b(k,1);
01886 set_b(i,j, get_b(i,j) - B[i]);
01887 }
01888
01889
01890 estimate->mod_prob=fullmodprob;
01891 estimate->mod_prob_updated=true ;
01892
01893
01894 normalize();
01895 invalidate_model();
01896 }
01897
01898
01899 void CHMM::estimate_model_viterbi(CHMM* estimate)
01900 {
01901 int32_t i,j,t;
01902 float64_t sum;
01903 float64_t* P=ARRAYN1(0);
01904 float64_t* Q=ARRAYN2(0);
01905
01906 path_deriv_updated=false ;
01907
01908
01909 for (i=0; i<N; i++)
01910 {
01911 for (j=0; j<N; j++)
01912 set_A(i,j, PSEUDO);
01913
01914 for (j=0; j<M; j++)
01915 set_B(i,j, PSEUDO);
01916
01917 P[i]=PSEUDO;
01918 Q[i]=PSEUDO;
01919 }
01920
01921 float64_t allpatprob=0 ;
01922
01923 #ifdef USE_HMMPARALLEL
01924 int32_t num_threads = parallel->get_num_threads();
01925 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
01926 S_DIM_THREAD_PARAM *params=SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
01927
01928 if (p_observations->get_num_vectors()<num_threads)
01929 num_threads=p_observations->get_num_vectors();
01930 #endif
01931
01932 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
01933 {
01934
01935 #ifdef USE_HMMPARALLEL
01936 if (dim%num_threads==0)
01937 {
01938 for (i=0; i<num_threads; i++)
01939 {
01940 if (dim+i<p_observations->get_num_vectors())
01941 {
01942 params[i].hmm=estimate ;
01943 params[i].dim=dim+i ;
01944 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
01945 }
01946 }
01947 for (i=0; i<num_threads; i++)
01948 {
01949 if (dim+i<p_observations->get_num_vectors())
01950 {
01951 pthread_join(threads[i], NULL);
01952 allpatprob += params[i].prob_sum;
01953 }
01954 }
01955 }
01956 #else
01957
01958 allpatprob += estimate->best_path(dim);
01959 #endif // USE_HMMPARALLEL
01960
01961
01962 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
01963 {
01964 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
01965 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
01966 }
01967
01968 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
01969
01970 P[estimate->PATH(dim)[0]]++;
01971 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
01972 }
01973
01974 #ifdef USE_HMMPARALLEL
01975 SG_FREE(threads);
01976 SG_FREE(params);
01977 #endif
01978
01979 allpatprob/=p_observations->get_num_vectors() ;
01980 estimate->all_pat_prob=allpatprob ;
01981 estimate->all_path_prob_updated=true ;
01982
01983
01984 for (i=0; i<N; i++)
01985 {
01986 sum=0;
01987 for (j=0; j<N; j++)
01988 sum+=get_A(i,j);
01989
01990 for (j=0; j<N; j++)
01991 set_a(i,j, log(get_A(i,j)/sum));
01992 }
01993
01994
01995 for (i=0; i<N; i++)
01996 {
01997 sum=0;
01998 for (j=0; j<M; j++)
01999 sum+=get_B(i,j);
02000
02001 for (j=0; j<M; j++)
02002 set_b(i,j, log(get_B(i, j)/sum));
02003 }
02004
02005
02006 sum=0;
02007 for (i=0; i<N; i++)
02008 sum+=P[i];
02009
02010 for (i=0; i<N; i++)
02011 set_p(i, log(P[i]/sum));
02012
02013
02014 sum=0;
02015 for (i=0; i<N; i++)
02016 sum+=Q[i];
02017
02018 for (i=0; i<N; i++)
02019 set_q(i, log(Q[i]/sum));
02020
02021
02022 invalidate_model();
02023 }
02024
02025
02026 void CHMM::estimate_model_viterbi_defined(CHMM* estimate)
02027 {
02028 int32_t i,j,k,t;
02029 float64_t sum;
02030 float64_t* P=ARRAYN1(0);
02031 float64_t* Q=ARRAYN2(0);
02032
02033 path_deriv_updated=false ;
02034
02035
02036 for (i=0; i<N; i++)
02037 {
02038 for (j=0; j<N; j++)
02039 set_A(i,j, PSEUDO);
02040
02041 for (j=0; j<M; j++)
02042 set_B(i,j, PSEUDO);
02043
02044 P[i]=PSEUDO;
02045 Q[i]=PSEUDO;
02046 }
02047
02048 #ifdef USE_HMMPARALLEL
02049 int32_t num_threads = parallel->get_num_threads();
02050 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
02051 S_DIM_THREAD_PARAM *params=SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
02052 #endif
02053
02054 float64_t allpatprob=0.0 ;
02055 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
02056 {
02057
02058 #ifdef USE_HMMPARALLEL
02059 if (dim%num_threads==0)
02060 {
02061 for (i=0; i<num_threads; i++)
02062 {
02063 if (dim+i<p_observations->get_num_vectors())
02064 {
02065 params[i].hmm=estimate ;
02066 params[i].dim=dim+i ;
02067 pthread_create(&threads[i], NULL, vit_dim_prefetch, (void*)¶ms[i]) ;
02068 }
02069 }
02070 for (i=0; i<num_threads; i++)
02071 {
02072 if (dim+i<p_observations->get_num_vectors())
02073 {
02074 pthread_join(threads[i], NULL);
02075 allpatprob += params[i].prob_sum;
02076 }
02077 }
02078 }
02079 #else // USE_HMMPARALLEL
02080
02081 allpatprob += estimate->best_path(dim);
02082 #endif // USE_HMMPARALLEL
02083
02084
02085
02086 for (t=0; t<p_observations->get_vector_length(dim)-1; t++)
02087 {
02088 set_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1], get_A(estimate->PATH(dim)[t], estimate->PATH(dim)[t+1])+1);
02089 set_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t), get_B(estimate->PATH(dim)[t], p_observations->get_feature(dim,t))+1);
02090 }
02091
02092 set_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1), get_B(estimate->PATH(dim)[p_observations->get_vector_length(dim)-1], p_observations->get_feature(dim,p_observations->get_vector_length(dim)-1)) + 1 );
02093
02094 P[estimate->PATH(dim)[0]]++;
02095 Q[estimate->PATH(dim)[p_observations->get_vector_length(dim)-1]]++;
02096 }
02097
02098 #ifdef USE_HMMPARALLEL
02099 SG_FREE(threads);
02100 SG_FREE(params);
02101 #endif
02102
02103
02104
02105
02106 allpatprob/=p_observations->get_num_vectors() ;
02107 estimate->all_pat_prob=allpatprob ;
02108 estimate->all_path_prob_updated=true ;
02109
02110
02111
02112 for (i=0; i<N; i++)
02113 {
02114 for (j=0; j<N; j++)
02115 set_a(i,j, estimate->get_a(i,j));
02116
02117 for (j=0; j<M; j++)
02118 set_b(i,j, estimate->get_b(i,j));
02119 }
02120
02121
02122 i=0;
02123 sum=0;
02124 j=model->get_learn_a(i,0);
02125 k=i;
02126 while (model->get_learn_a(i,0)!=-1 || k<i)
02127 {
02128 if (j==model->get_learn_a(i,0))
02129 {
02130 sum+=get_A(model->get_learn_a(i,0), model->get_learn_a(i,1));
02131 i++;
02132 }
02133 else
02134 {
02135 while (k<i)
02136 {
02137 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1), log (get_A(model->get_learn_a(k,0), model->get_learn_a(k,1)) / sum));
02138 k++;
02139 }
02140
02141 sum=0;
02142 j=model->get_learn_a(i,0);
02143 k=i;
02144 }
02145 }
02146
02147
02148 i=0;
02149 sum=0;
02150 j=model->get_learn_b(i,0);
02151 k=i;
02152 while (model->get_learn_b(i,0)!=-1 || k<i)
02153 {
02154 if (j==model->get_learn_b(i,0))
02155 {
02156 sum+=get_B(model->get_learn_b(i,0),model->get_learn_b(i,1));
02157 i++;
02158 }
02159 else
02160 {
02161 while (k<i)
02162 {
02163 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1), log (get_B(model->get_learn_b(k,0), model->get_learn_b(k,1)) / sum));
02164 k++;
02165 }
02166
02167 sum=0;
02168 j=model->get_learn_b(i,0);
02169 k=i;
02170 }
02171 }
02172
02173 i=0;
02174 sum=0;
02175 while (model->get_learn_p(i)!=-1)
02176 {
02177 sum+=P[model->get_learn_p(i)] ;
02178 i++ ;
02179 } ;
02180 i=0 ;
02181 while (model->get_learn_p(i)!=-1)
02182 {
02183 set_p(model->get_learn_p(i), log(P[model->get_learn_p(i)]/sum));
02184 i++ ;
02185 } ;
02186
02187 i=0;
02188 sum=0;
02189 while (model->get_learn_q(i)!=-1)
02190 {
02191 sum+=Q[model->get_learn_q(i)] ;
02192 i++ ;
02193 } ;
02194 i=0 ;
02195 while (model->get_learn_q(i)!=-1)
02196 {
02197 set_q(model->get_learn_q(i), log(Q[model->get_learn_q(i)]/sum));
02198 i++ ;
02199 } ;
02200
02201
02202
02203 invalidate_model();
02204 }
02205
02206
02207
02208 void CHMM::output_model(bool verbose)
02209 {
02210 int32_t i,j;
02211 float64_t checksum;
02212
02213
02214 SG_INFO( "log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02215 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02216 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02217
02218 if (verbose)
02219 {
02220
02221 SG_INFO( "\ntransition matrix\n");
02222 for (i=0; i<N; i++)
02223 {
02224 checksum= get_q(i);
02225 for (j=0; j<N; j++)
02226 {
02227 checksum= CMath::logarithmic_sum(checksum, get_a(i,j));
02228
02229 SG_INFO( "a(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_a(i,j)));
02230
02231 if (j % 4 == 3)
02232 SG_PRINT( "\n");
02233 }
02234 if (fabs(checksum)>1e-5)
02235 SG_DEBUG( " checksum % E ******* \n",checksum);
02236 else
02237 SG_DEBUG( " checksum % E\n",checksum);
02238 }
02239
02240
02241 SG_INFO( "\ndistribution of start states\n");
02242 checksum=-CMath::INFTY;
02243 for (i=0; i<N; i++)
02244 {
02245 checksum= CMath::logarithmic_sum(checksum, get_p(i));
02246 SG_INFO( "p(%02i)=%1.4f ",i, (float32_t) exp(get_p(i)));
02247 if (i % 4 == 3)
02248 SG_PRINT( "\n");
02249 }
02250 if (fabs(checksum)>1e-5)
02251 SG_DEBUG( " checksum % E ******* \n",checksum);
02252 else
02253 SG_DEBUG( " checksum=% E\n", checksum);
02254
02255
02256 SG_INFO( "\ndistribution of terminal states\n");
02257 checksum=-CMath::INFTY;
02258 for (i=0; i<N; i++)
02259 {
02260 checksum= CMath::logarithmic_sum(checksum, get_q(i));
02261 SG_INFO("q(%02i)=%1.4f ",i, (float32_t) exp(get_q(i)));
02262 if (i % 4 == 3)
02263 SG_INFO("\n");
02264 }
02265 if (fabs(checksum)>1e-5)
02266 SG_DEBUG( " checksum % E ******* \n",checksum);
02267 else
02268 SG_DEBUG( " checksum=% E\n", checksum);
02269
02270
02271 SG_INFO("\ndistribution of observations given the state\n");
02272 for (i=0; i<N; i++)
02273 {
02274 checksum=-CMath::INFTY;
02275 for (j=0; j<M; j++)
02276 {
02277 checksum=CMath::logarithmic_sum(checksum, get_b(i,j));
02278 SG_INFO("b(%02i,%02i)=%1.4f ",i,j, (float32_t) exp(get_b(i,j)));
02279 if (j % 4 == 3)
02280 SG_PRINT("\n");
02281 }
02282 if (fabs(checksum)>1e-5)
02283 SG_DEBUG( " checksum % E ******* \n",checksum);
02284 else
02285 SG_DEBUG( " checksum % E\n",checksum);
02286 }
02287 }
02288 SG_PRINT("\n");
02289 }
02290
02291
02292 void CHMM::output_model_defined(bool verbose)
02293 {
02294 int32_t i,j;
02295 if (!model)
02296 return ;
02297
02298
02299 SG_INFO("log(Pr[O|model])=%e, #states: %i, #observationssymbols: %i, #observations: %ix%i\n",
02300 (float64_t)((p_observations) ? model_probability() : -CMath::INFTY),
02301 N, M, ((p_observations) ? p_observations->get_max_vector_length() : 0), ((p_observations) ? p_observations->get_num_vectors() : 0));
02302
02303 if (verbose)
02304 {
02305
02306 SG_INFO("\ntransition matrix\n");
02307
02308
02309 i=0;
02310 j=model->get_learn_a(i,0);
02311 while (model->get_learn_a(i,0)!=-1)
02312 {
02313 if (j!=model->get_learn_a(i,0))
02314 {
02315 j=model->get_learn_a(i,0);
02316 SG_PRINT("\n");
02317 }
02318
02319 SG_INFO("a(%02i,%02i)=%1.4f ",model->get_learn_a(i,0), model->get_learn_a(i,1), (float32_t) exp(get_a(model->get_learn_a(i,0), model->get_learn_a(i,1))));
02320 i++;
02321 }
02322
02323
02324 SG_INFO("\n\ndistribution of observations given the state\n");
02325 i=0;
02326 j=model->get_learn_b(i,0);
02327 while (model->get_learn_b(i,0)!=-1)
02328 {
02329 if (j!=model->get_learn_b(i,0))
02330 {
02331 j=model->get_learn_b(i,0);
02332 SG_PRINT("\n");
02333 }
02334
02335 SG_INFO("b(%02i,%02i)=%1.4f ",model->get_learn_b(i,0),model->get_learn_b(i,1), (float32_t) exp(get_b(model->get_learn_b(i,0),model->get_learn_b(i,1))));
02336 i++;
02337 }
02338
02339 SG_PRINT("\n");
02340 }
02341 SG_PRINT("\n");
02342 }
02343
02344
02345
02346
02347 void CHMM::convert_to_log()
02348 {
02349 int32_t i,j;
02350
02351 for (i=0; i<N; i++)
02352 {
02353 if (get_p(i)!=0)
02354 set_p(i, log(get_p(i)));
02355 else
02356 set_p(i, -CMath::INFTY);;
02357 }
02358
02359 for (i=0; i<N; i++)
02360 {
02361 if (get_q(i)!=0)
02362 set_q(i, log(get_q(i)));
02363 else
02364 set_q(i, -CMath::INFTY);;
02365 }
02366
02367 for (i=0; i<N; i++)
02368 {
02369 for (j=0; j<N; j++)
02370 {
02371 if (get_a(i,j)!=0)
02372 set_a(i,j, log(get_a(i,j)));
02373 else
02374 set_a(i,j, -CMath::INFTY);
02375 }
02376 }
02377
02378 for (i=0; i<N; i++)
02379 {
02380 for (j=0; j<M; j++)
02381 {
02382 if (get_b(i,j)!=0)
02383 set_b(i,j, log(get_b(i,j)));
02384 else
02385 set_b(i,j, -CMath::INFTY);
02386 }
02387 }
02388 loglikelihood=true;
02389
02390 invalidate_model();
02391 }
02392
02393
02394 void CHMM::init_model_random()
02395 {
02396 const float64_t MIN_RAND=23e-3;
02397
02398 float64_t sum;
02399 int32_t i,j;
02400
02401
02402 for (i=0; i<N; i++)
02403 {
02404 sum=0;
02405 for (j=0; j<N; j++)
02406 {
02407 set_a(i,j, CMath::random(MIN_RAND, 1.0));
02408
02409 sum+=get_a(i,j);
02410 }
02411
02412 for (j=0; j<N; j++)
02413 set_a(i,j, get_a(i,j)/sum);
02414 }
02415
02416
02417 sum=0;
02418 for (i=0; i<N; i++)
02419 {
02420 set_p(i, CMath::random(MIN_RAND, 1.0));
02421
02422 sum+=get_p(i);
02423 }
02424
02425 for (i=0; i<N; i++)
02426 set_p(i, get_p(i)/sum);
02427
02428
02429 sum=0;
02430 for (i=0; i<N; i++)
02431 {
02432 set_q(i, CMath::random(MIN_RAND, 1.0));
02433
02434 sum+=get_q(i);
02435 }
02436
02437 for (i=0; i<N; i++)
02438 set_q(i, get_q(i)/sum);
02439
02440
02441 for (i=0; i<N; i++)
02442 {
02443 sum=0;
02444 for (j=0; j<M; j++)
02445 {
02446 set_b(i,j, CMath::random(MIN_RAND, 1.0));
02447
02448 sum+=get_b(i,j);
02449 }
02450
02451 for (j=0; j<M; j++)
02452 set_b(i,j, get_b(i,j)/sum);
02453 }
02454
02455
02456 invalidate_model();
02457 }
02458
02459
02460 void CHMM::init_model_defined()
02461 {
02462 int32_t i,j,k,r;
02463 float64_t sum;
02464 const float64_t MIN_RAND=23e-3;
02465
02466
02467 for (i=0; i<N; i++)
02468 for (j=0; j<N; j++)
02469 set_a(i,j, 0);
02470
02471
02472 for (i=0; i<N; i++)
02473 set_p(i, 0);
02474
02475
02476 for (i=0; i<N; i++)
02477 set_q(i, 0);
02478
02479
02480 for (i=0; i<N; i++)
02481 for (j=0; j<M; j++)
02482 set_b(i,j, 0);
02483
02484
02485
02486 float64_t *R=SG_MALLOC(float64_t, N);
02487 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02488 i=0; sum=0; k=i;
02489 j=model->get_learn_a(i,0);
02490 while (model->get_learn_a(i,0)!=-1 || k<i)
02491 {
02492 if (j==model->get_learn_a(i,0))
02493 {
02494 sum+=R[model->get_learn_a(i,1)] ;
02495 i++;
02496 }
02497 else
02498 {
02499 while (k<i)
02500 {
02501 set_a(model->get_learn_a(k,0), model->get_learn_a(k,1),
02502 R[model->get_learn_a(k,1)]/sum);
02503 k++;
02504 }
02505 j=model->get_learn_a(i,0);
02506 k=i;
02507 sum=0;
02508 for (r=0; r<N; r++) R[r]=CMath::random(MIN_RAND,1.0);
02509 }
02510 }
02511 SG_FREE(R); R=NULL ;
02512
02513
02514 R=SG_MALLOC(float64_t, M);
02515 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02516 i=0; sum=0; k=0 ;
02517 j=model->get_learn_b(i,0);
02518 while (model->get_learn_b(i,0)!=-1 || k<i)
02519 {
02520 if (j==model->get_learn_b(i,0))
02521 {
02522 sum+=R[model->get_learn_b(i,1)] ;
02523 i++;
02524 }
02525 else
02526 {
02527 while (k<i)
02528 {
02529 set_b(model->get_learn_b(k,0),model->get_learn_b(k,1),
02530 R[model->get_learn_b(k,1)]/sum);
02531 k++;
02532 }
02533
02534 j=model->get_learn_b(i,0);
02535 k=i;
02536 sum=0;
02537 for (r=0; r<M; r++) R[r]=CMath::random(MIN_RAND,1.0);
02538 }
02539 }
02540 SG_FREE(R); R=NULL ;
02541
02542
02543 i=0;
02544 while (model->get_const_a(i,0) != -1)
02545 {
02546 set_a(model->get_const_a(i,0), model->get_const_a(i,1), model->get_const_a_val(i));
02547 i++;
02548 }
02549
02550
02551 i=0;
02552 while (model->get_const_b(i,0) != -1)
02553 {
02554 set_b(model->get_const_b(i,0), model->get_const_b(i,1), model->get_const_b_val(i));
02555 i++;
02556 }
02557
02558
02559 i=0;
02560 while (model->get_const_p(i) != -1)
02561 {
02562 set_p(model->get_const_p(i), model->get_const_p_val(i));
02563 i++;
02564 }
02565
02566
02567 for (i=0; i<N; i++)
02568 set_q(i, 0.0);
02569
02570
02571 i=0;
02572 while (model->get_const_q(i) != -1)
02573 {
02574 set_q(model->get_const_q(i), model->get_const_q_val(i));
02575 i++;
02576 }
02577
02578
02579 i=0;
02580 sum=0;
02581 while (model->get_learn_p(i)!=-1)
02582 {
02583 set_p(model->get_learn_p(i),CMath::random(MIN_RAND,1.0)) ;
02584 sum+=get_p(model->get_learn_p(i)) ;
02585 i++ ;
02586 } ;
02587 i=0 ;
02588 while (model->get_learn_p(i)!=-1)
02589 {
02590 set_p(model->get_learn_p(i), get_p(model->get_learn_p(i))/sum);
02591 i++ ;
02592 } ;
02593
02594
02595 i=0;
02596 sum=0;
02597 while (model->get_learn_q(i)!=-1)
02598 {
02599 set_q(model->get_learn_q(i),CMath::random(MIN_RAND,1.0)) ;
02600 sum+=get_q(model->get_learn_q(i)) ;
02601 i++ ;
02602 } ;
02603 i=0 ;
02604 while (model->get_learn_q(i)!=-1)
02605 {
02606 set_q(model->get_learn_q(i), get_q(model->get_learn_q(i))/sum);
02607 i++ ;
02608 } ;
02609
02610
02611 invalidate_model();
02612 }
02613
02614 void CHMM::clear_model()
02615 {
02616 int32_t i,j;
02617 for (i=0; i<N; i++)
02618 {
02619 set_p(i, log(PSEUDO));
02620 set_q(i, log(PSEUDO));
02621
02622 for (j=0; j<N; j++)
02623 set_a(i,j, log(PSEUDO));
02624
02625 for (j=0; j<M; j++)
02626 set_b(i,j, log(PSEUDO));
02627 }
02628 }
02629
02630 void CHMM::clear_model_defined()
02631 {
02632 int32_t i,j,k;
02633
02634 for (i=0; (j=model->get_learn_p(i))!=-1; i++)
02635 set_p(j, log(PSEUDO));
02636
02637 for (i=0; (j=model->get_learn_q(i))!=-1; i++)
02638 set_q(j, log(PSEUDO));
02639
02640 for (i=0; (j=model->get_learn_a(i,0))!=-1; i++)
02641 {
02642 k=model->get_learn_a(i,1);
02643 set_a(j,k, log(PSEUDO));
02644 }
02645
02646 for (i=0; (j=model->get_learn_b(i,0))!=-1; i++)
02647 {
02648 k=model->get_learn_b(i,1);
02649 set_b(j,k, log(PSEUDO));
02650 }
02651 }
02652
02653 void CHMM::copy_model(CHMM* l)
02654 {
02655 int32_t i,j;
02656 for (i=0; i<N; i++)
02657 {
02658 set_p(i, l->get_p(i));
02659 set_q(i, l->get_q(i));
02660
02661 for (j=0; j<N; j++)
02662 set_a(i,j, l->get_a(i,j));
02663
02664 for (j=0; j<M; j++)
02665 set_b(i,j, l->get_b(i,j));
02666 }
02667 }
02668
02669 void CHMM::invalidate_model()
02670 {
02671
02672 this->mod_prob=0.0;
02673 this->mod_prob_updated=false;
02674
02675 if (mem_initialized)
02676 {
02677 if (trans_list_forward_cnt)
02678 SG_FREE(trans_list_forward_cnt);
02679 trans_list_forward_cnt=NULL ;
02680 if (trans_list_backward_cnt)
02681 SG_FREE(trans_list_backward_cnt);
02682 trans_list_backward_cnt=NULL ;
02683 if (trans_list_forward)
02684 {
02685 for (int32_t i=0; i<trans_list_len; i++)
02686 if (trans_list_forward[i])
02687 SG_FREE(trans_list_forward[i]);
02688 SG_FREE(trans_list_forward);
02689 trans_list_forward=NULL ;
02690 }
02691 if (trans_list_backward)
02692 {
02693 for (int32_t i=0; i<trans_list_len; i++)
02694 if (trans_list_backward[i])
02695 SG_FREE(trans_list_backward[i]);
02696 SG_FREE(trans_list_backward);
02697 trans_list_backward = NULL ;
02698 } ;
02699
02700 trans_list_len = N ;
02701 trans_list_forward = SG_MALLOC(T_STATES*, N);
02702 trans_list_forward_cnt = SG_MALLOC(T_STATES, N);
02703
02704 for (int32_t j=0; j<N; j++)
02705 {
02706 trans_list_forward_cnt[j]= 0 ;
02707 trans_list_forward[j]= SG_MALLOC(T_STATES, N);
02708 for (int32_t i=0; i<N; i++)
02709 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02710 {
02711 trans_list_forward[j][trans_list_forward_cnt[j]]=i ;
02712 trans_list_forward_cnt[j]++ ;
02713 }
02714 } ;
02715
02716 trans_list_backward = SG_MALLOC(T_STATES*, N);
02717 trans_list_backward_cnt = SG_MALLOC(T_STATES, N);
02718
02719 for (int32_t i=0; i<N; i++)
02720 {
02721 trans_list_backward_cnt[i]= 0 ;
02722 trans_list_backward[i]= SG_MALLOC(T_STATES, N);
02723 for (int32_t j=0; j<N; j++)
02724 if (get_a(i,j)>CMath::ALMOST_NEG_INFTY)
02725 {
02726 trans_list_backward[i][trans_list_backward_cnt[i]]=j ;
02727 trans_list_backward_cnt[i]++ ;
02728 }
02729 } ;
02730 } ;
02731 this->all_pat_prob=0.0;
02732 this->pat_prob=0.0;
02733 this->path_deriv_updated=false ;
02734 this->path_deriv_dimension=-1 ;
02735 this->all_path_prob_updated=false;
02736
02737 #ifdef USE_HMMPARALLEL_STRUCTURES
02738 {
02739 for (int32_t i=0; i<parallel->get_num_threads(); i++)
02740 {
02741 this->alpha_cache[i].updated=false;
02742 this->beta_cache[i].updated=false;
02743 path_prob_updated[i]=false ;
02744 path_prob_dimension[i]=-1 ;
02745 } ;
02746 }
02747 #else // USE_HMMPARALLEL_STRUCTURES
02748 this->alpha_cache.updated=false;
02749 this->beta_cache.updated=false;
02750 this->path_prob_dimension=-1;
02751 this->path_prob_updated=false;
02752
02753 #endif // USE_HMMPARALLEL_STRUCTURES
02754 }
02755
02756 void CHMM::open_bracket(FILE* file)
02757 {
02758 int32_t value;
02759 while (((value=fgetc(file)) != EOF) && (value!='['))
02760 {
02761 if (value=='\n')
02762 line++;
02763 }
02764
02765 if (value==EOF)
02766 error(line, "expected \"[\" in input file");
02767
02768 while (((value=fgetc(file)) != EOF) && (isspace(value)))
02769 {
02770 if (value=='\n')
02771 line++;
02772 }
02773
02774 ungetc(value, file);
02775 }
02776
02777 void CHMM::close_bracket(FILE* file)
02778 {
02779 int32_t value;
02780 while (((value=fgetc(file)) != EOF) && (value!=']'))
02781 {
02782 if (value=='\n')
02783 line++;
02784 }
02785
02786 if (value==EOF)
02787 error(line, "expected \"]\" in input file");
02788 }
02789
02790 bool CHMM::comma_or_space(FILE* file)
02791 {
02792 int32_t value;
02793 while (((value=fgetc(file)) != EOF) && (value!=',') && (value!=';') && (value!=']'))
02794 {
02795 if (value=='\n')
02796 line++;
02797 }
02798 if (value==']')
02799 {
02800 ungetc(value, file);
02801 SG_ERROR( "found ']' instead of ';' or ','\n") ;
02802 return false ;
02803 } ;
02804
02805 if (value==EOF)
02806 error(line, "expected \";\" or \",\" in input file");
02807
02808 while (((value=fgetc(file)) != EOF) && (isspace(value)))
02809 {
02810 if (value=='\n')
02811 line++;
02812 }
02813 ungetc(value, file);
02814 return true ;
02815 }
02816
02817 bool CHMM::get_numbuffer(FILE* file, char* buffer, int32_t length)
02818 {
02819 signed char value;
02820
02821 while (((value=fgetc(file)) != EOF) &&
02822 !isdigit(value) && (value!='A')
02823 && (value!='C') && (value!='G') && (value!='T')
02824 && (value!='N') && (value!='n')
02825 && (value!='.') && (value!='-') && (value!='e') && (value!=']'))
02826 {
02827 if (value=='\n')
02828 line++;
02829 }
02830 if (value==']')
02831 {
02832 ungetc(value,file) ;
02833 return false ;
02834 } ;
02835 if (value!=EOF)
02836 {
02837 int32_t i=0;
02838 switch (value)
02839 {
02840 case 'A':
02841 value='0' +CAlphabet::B_A;
02842 break;
02843 case 'C':
02844 value='0' +CAlphabet::B_C;
02845 break;
02846 case 'G':
02847 value='0' +CAlphabet::B_G;
02848 break;
02849 case 'T':
02850 value='0' +CAlphabet::B_T;
02851 break;
02852 };
02853
02854 buffer[i++]=value;
02855
02856 while (((value=fgetc(file)) != EOF) &&
02857 (isdigit(value) || (value=='.') || (value=='-') || (value=='e')
02858 || (value=='A') || (value=='C') || (value=='G')|| (value=='T')
02859 || (value=='N') || (value=='n')) && (i<length))
02860 {
02861 switch (value)
02862 {
02863 case 'A':
02864 value='0' +CAlphabet::B_A;
02865 break;
02866 case 'C':
02867 value='0' +CAlphabet::B_C;
02868 break;
02869 case 'G':
02870 value='0' +CAlphabet::B_G;
02871 break;
02872 case 'T':
02873 value='0' +CAlphabet::B_T;
02874 break;
02875 case '1': case '2': case'3': case '4': case'5':
02876 case '6': case '7': case'8': case '9': case '0': break ;
02877 case '.': case 'e': case '-': break ;
02878 default:
02879 SG_ERROR( "found crap: %i %c (pos:%li)\n",i,value,ftell(file)) ;
02880 };
02881 buffer[i++]=value;
02882 }
02883 ungetc(value, file);
02884 buffer[i]='\0';
02885
02886 return (i<=length) && (i>0);
02887 }
02888 return false;
02889 }
02890
02891
02892
02893
02894
02895
02896
02897
02898
02899
02900
02901
02902
02903
02904
02905
02906
02907
02908
02909
02910
02911
02912
02913
02914
02915
02916
02917
02918
02919
02920
02921
02922
02923
02924
02925
02926 bool CHMM::load_model(FILE* file)
02927 {
02928 int32_t received_params=0;
02929
02930 bool result=false;
02931 E_STATE state=INITIAL;
02932 char buffer[1024];
02933
02934 line=1;
02935 int32_t i,j;
02936
02937 if (file)
02938 {
02939 while (state!=END)
02940 {
02941 int32_t value=fgetc(file);
02942
02943 if (value=='\n')
02944 line++;
02945 if (value==EOF)
02946 state=END;
02947
02948 switch (state)
02949 {
02950 case INITIAL:
02951 if (value=='N')
02952 {
02953 if (received_params & GOTN)
02954 error(line, "in model file: \"p double defined\"");
02955 else
02956 state=GET_N;
02957 }
02958 else if (value=='M')
02959 {
02960 if (received_params & GOTM)
02961 error(line, "in model file: \"p double defined\"");
02962 else
02963 state=GET_M;
02964 }
02965 else if (value=='%')
02966 {
02967 state=COMMENT;
02968 }
02969 case ARRAYs:
02970 if (value=='p')
02971 {
02972 if (received_params & GOTp)
02973 error(line, "in model file: \"p double defined\"");
02974 else
02975 state=GET_p;
02976 }
02977 if (value=='q')
02978 {
02979 if (received_params & GOTq)
02980 error(line, "in model file: \"q double defined\"");
02981 else
02982 state=GET_q;
02983 }
02984 else if (value=='a')
02985 {
02986 if (received_params & GOTa)
02987 error(line, "in model file: \"a double defined\"");
02988 else
02989 state=GET_a;
02990 }
02991 else if (value=='b')
02992 {
02993 if (received_params & GOTb)
02994 error(line, "in model file: \"b double defined\"");
02995 else
02996 state=GET_b;
02997 }
02998 else if (value=='%')
02999 {
03000 state=COMMENT;
03001 }
03002 break;
03003 case GET_N:
03004 if (value=='=')
03005 {
03006 if (get_numbuffer(file, buffer, 4))
03007 {
03008 this->N= atoi(buffer);
03009 received_params|=GOTN;
03010 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03011 }
03012 else
03013 state=END;
03014 }
03015 break;
03016 case GET_M:
03017 if (value=='=')
03018 {
03019 if (get_numbuffer(file, buffer, 4))
03020 {
03021 this->M= atoi(buffer);
03022 received_params|=GOTM;
03023 state= (received_params == (GOTN | GOTM | GOTO)) ? ARRAYs : INITIAL;
03024 }
03025 else
03026 state=END;
03027 }
03028 break;
03029 case GET_a:
03030 if (value=='=')
03031 {
03032 float64_t f;
03033
03034 transition_matrix_a=SG_MALLOC(float64_t, N*N);
03035 open_bracket(file);
03036 for (i=0; i<this->N; i++)
03037 {
03038 open_bracket(file);
03039
03040 for (j=0; j<this->N ; j++)
03041 {
03042
03043 if (fscanf( file, "%le", &f ) != 1)
03044 error(line, "float64_t expected");
03045 else
03046 set_a(i,j, f);
03047
03048 if (j<this->N-1)
03049 comma_or_space(file);
03050 else
03051 close_bracket(file);
03052 }
03053
03054 if (i<this->N-1)
03055 comma_or_space(file);
03056 else
03057 close_bracket(file);
03058 }
03059 received_params|=GOTa;
03060 }
03061 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03062 break;
03063 case GET_b:
03064 if (value=='=')
03065 {
03066 float64_t f;
03067
03068 observation_matrix_b=SG_MALLOC(float64_t, N*M);
03069 open_bracket(file);
03070 for (i=0; i<this->N; i++)
03071 {
03072 open_bracket(file);
03073
03074 for (j=0; j<this->M ; j++)
03075 {
03076
03077 if (fscanf( file, "%le", &f ) != 1)
03078 error(line, "float64_t expected");
03079 else
03080 set_b(i,j, f);
03081
03082 if (j<this->M-1)
03083 comma_or_space(file);
03084 else
03085 close_bracket(file);
03086 }
03087
03088 if (i<this->N-1)
03089 comma_or_space(file);
03090 else
03091 close_bracket(file);
03092 }
03093 received_params|=GOTb;
03094 }
03095 state= ((received_params & (GOTa | GOTb | GOTp | GOTq)) == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03096 break;
03097 case GET_p:
03098 if (value=='=')
03099 {
03100 float64_t f;
03101
03102 initial_state_distribution_p=SG_MALLOC(float64_t, N);
03103 open_bracket(file);
03104 for (i=0; i<this->N ; i++)
03105 {
03106 if (fscanf( file, "%le", &f ) != 1)
03107 error(line, "float64_t expected");
03108 else
03109 set_p(i, f);
03110
03111 if (i<this->N-1)
03112 comma_or_space(file);
03113 else
03114 close_bracket(file);
03115 }
03116 received_params|=GOTp;
03117 }
03118 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03119 break;
03120 case GET_q:
03121 if (value=='=')
03122 {
03123 float64_t f;
03124
03125 end_state_distribution_q=SG_MALLOC(float64_t, N);
03126 open_bracket(file);
03127 for (i=0; i<this->N ; i++)
03128 {
03129 if (fscanf( file, "%le", &f ) != 1)
03130 error(line, "float64_t expected");
03131 else
03132 set_q(i, f);
03133
03134 if (i<this->N-1)
03135 comma_or_space(file);
03136 else
03137 close_bracket(file);
03138 }
03139 received_params|=GOTq;
03140 }
03141 state= (received_params == (GOTa | GOTb | GOTp | GOTq)) ? END : ARRAYs;
03142 break;
03143 case COMMENT:
03144 if (value==EOF)
03145 state=END;
03146 else if (value=='\n')
03147 {
03148 line++;
03149 state=INITIAL;
03150 }
03151 break;
03152
03153 default:
03154 break;
03155 }
03156 }
03157 result= (received_params== (GOTa | GOTb | GOTp | GOTq | GOTN | GOTM | GOTO));
03158 }
03159
03160 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
03162 return result;
03163 }
03164
03165
03166
03167
03168
03169
03170
03171
03172
03173
03174
03175
03176
03177
03178
03179
03180
03181
03182
03183
03184
03185
03186
03187
03188
03189
03190
03191
03192
03193
03194
03195
03196
03197
03198
03199
03200
03201
03202
03203
03204
03205
03206
03207
03208
03209
03210
03211
03212
03213
03214
03215
03216
03217
03218
03219
03220
03221
03222
03223
03224 bool CHMM::load_definitions(FILE* file, bool verbose, bool _initialize)
03225 {
03226 if (model)
03227 delete model ;
03228 model=new Model();
03229
03230 int32_t received_params=0x0000000;
03231 char buffer[1024];
03232
03233 bool result=false;
03234 E_STATE state=INITIAL;
03235
03236 {
03237 model->set_learn_a(0, -1);
03238 model->set_learn_a(1, -1);
03239 model->set_const_a(0, -1);
03240 model->set_const_a(1, -1);
03241 model->set_const_a_val(0, 1.0);
03242 model->set_learn_b(0, -1);
03243 model->set_const_b(0, -1);
03244 model->set_const_b_val(0, 1.0);
03245 model->set_learn_p(0, -1);
03246 model->set_learn_q(0, -1);
03247 model->set_const_p(0, -1);
03248 model->set_const_q(0, -1);
03249 } ;
03250
03251 line=1;
03252
03253 if (file)
03254 {
03255 while (state!=END)
03256 {
03257 int32_t value=fgetc(file);
03258
03259 if (value=='\n')
03260 line++;
03261
03262 if (value==EOF)
03263 state=END;
03264
03265 switch (state)
03266 {
03267 case INITIAL:
03268 if (value=='l')
03269 {
03270 if (fgetc(file)=='e' && fgetc(file)=='a' && fgetc(file)=='r' && fgetc(file)=='n' && fgetc(file)=='_')
03271 {
03272 switch(fgetc(file))
03273 {
03274 case 'a':
03275 state=GET_learn_a;
03276 break;
03277 case 'b':
03278 state=GET_learn_b;
03279 break;
03280 case 'p':
03281 state=GET_learn_p;
03282 break;
03283 case 'q':
03284 state=GET_learn_q;
03285 break;
03286 default:
03287 error(line, "a,b,p or q expected in train definition file");
03288 };
03289 }
03290 }
03291 else if (value=='c')
03292 {
03293 if (fgetc(file)=='o' && fgetc(file)=='n' && fgetc(file)=='s'
03294 && fgetc(file)=='t' && fgetc(file)=='_')
03295 {
03296 switch(fgetc(file))
03297 {
03298 case 'a':
03299 state=GET_const_a;
03300 break;
03301 case 'b':
03302 state=GET_const_b;
03303 break;
03304 case 'p':
03305 state=GET_const_p;
03306 break;
03307 case 'q':
03308 state=GET_const_q;
03309 break;
03310 default:
03311 error(line, "a,b,p or q expected in train definition file");
03312 };
03313 }
03314 }
03315 else if (value=='%')
03316 {
03317 state=COMMENT;
03318 }
03319 else if (value==EOF)
03320 {
03321 state=END;
03322 }
03323 break;
03324 case GET_learn_a:
03325 if (value=='=')
03326 {
03327 open_bracket(file);
03328 bool finished=false;
03329 int32_t i=0;
03330
03331 if (verbose)
03332 SG_DEBUG( "\nlearn for transition matrix: ") ;
03333 while (!finished)
03334 {
03335 open_bracket(file);
03336
03337 if (get_numbuffer(file, buffer, 4))
03338 {
03339 value=atoi(buffer);
03340 model->set_learn_a(i++, value);
03341
03342 if (value<0)
03343 {
03344 finished=true;
03345 break;
03346 }
03347 if (value>=N)
03348 SG_ERROR( "invalid value for learn_a(%i,0): %i\n",i/2,(int)value) ;
03349 }
03350 else
03351 break;
03352
03353 comma_or_space(file);
03354
03355 if (get_numbuffer(file, buffer, 4))
03356 {
03357 value=atoi(buffer);
03358 model->set_learn_a(i++, value);
03359
03360 if (value<0)
03361 {
03362 finished=true;
03363 break;
03364 }
03365 if (value>=N)
03366 SG_ERROR( "invalid value for learn_a(%i,1): %i\n",i/2-1,(int)value) ;
03367
03368 }
03369 else
03370 break;
03371 close_bracket(file);
03372 }
03373 close_bracket(file);
03374 if (verbose)
03375 SG_DEBUG( "%i Entries",(int)(i/2)) ;
03376
03377 if (finished)
03378 {
03379 received_params|=GOTlearn_a;
03380
03381 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03382 }
03383 else
03384 state=END;
03385 }
03386 break;
03387 case GET_learn_b:
03388 if (value=='=')
03389 {
03390 open_bracket(file);
03391 bool finished=false;
03392 int32_t i=0;
03393
03394 if (verbose)
03395 SG_DEBUG( "\nlearn for emission matrix: ") ;
03396
03397 while (!finished)
03398 {
03399 open_bracket(file);
03400
03401 int32_t combine=0;
03402
03403 for (int32_t j=0; j<2; j++)
03404 {
03405 if (get_numbuffer(file, buffer, 4))
03406 {
03407 value=atoi(buffer);
03408
03409 if (j==0)
03410 {
03411 model->set_learn_b(i++, value);
03412
03413 if (value<0)
03414 {
03415 finished=true;
03416 break;
03417 }
03418 if (value>=N)
03419 SG_ERROR( "invalid value for learn_b(%i,0): %i\n",i/2,(int)value) ;
03420 }
03421 else
03422 combine=value;
03423 }
03424 else
03425 break;
03426
03427 if (j<1)
03428 comma_or_space(file);
03429 else
03430 close_bracket(file);
03431 }
03432 model->set_learn_b(i++, combine);
03433 if (combine>=M)
03434
03435 SG_ERROR("invalid value for learn_b(%i,1): %i\n",i/2-1,(int)value) ;
03436 }
03437 close_bracket(file);
03438 if (verbose)
03439 SG_DEBUG( "%i Entries",(int)(i/2-1)) ;
03440
03441 if (finished)
03442 {
03443 received_params|=GOTlearn_b;
03444 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03445 }
03446 else
03447 state=END;
03448 }
03449 break;
03450 case GET_learn_p:
03451 if (value=='=')
03452 {
03453 open_bracket(file);
03454 bool finished=false;
03455 int32_t i=0;
03456
03457 if (verbose)
03458 SG_DEBUG( "\nlearn start states: ") ;
03459 while (!finished)
03460 {
03461 if (get_numbuffer(file, buffer, 4))
03462 {
03463 value=atoi(buffer);
03464
03465 model->set_learn_p(i++, value);
03466
03467 if (value<0)
03468 {
03469 finished=true;
03470 break;
03471 }
03472 if (value>=N)
03473 SG_ERROR( "invalid value for learn_p(%i): %i\n",i-1,(int)value) ;
03474 }
03475 else
03476 break;
03477
03478 comma_or_space(file);
03479 }
03480
03481 close_bracket(file);
03482 if (verbose)
03483 SG_DEBUG( "%i Entries",i-1) ;
03484
03485 if (finished)
03486 {
03487 received_params|=GOTlearn_p;
03488 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03489 }
03490 else
03491 state=END;
03492 }
03493 break;
03494 case GET_learn_q:
03495 if (value=='=')
03496 {
03497 open_bracket(file);
03498 bool finished=false;
03499 int32_t i=0;
03500
03501 if (verbose)
03502 SG_DEBUG( "\nlearn terminal states: ") ;
03503 while (!finished)
03504 {
03505 if (get_numbuffer(file, buffer, 4))
03506 {
03507 value=atoi(buffer);
03508 model->set_learn_q(i++, value);
03509
03510 if (value<0)
03511 {
03512 finished=true;
03513 break;
03514 }
03515 if (value>=N)
03516 SG_ERROR( "invalid value for learn_q(%i): %i\n",i-1,(int)value) ;
03517 }
03518 else
03519 break;
03520
03521 comma_or_space(file);
03522 }
03523
03524 close_bracket(file);
03525 if (verbose)
03526 SG_DEBUG( "%i Entries",i-1) ;
03527
03528 if (finished)
03529 {
03530 received_params|=GOTlearn_q;
03531 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03532 }
03533 else
03534 state=END;
03535 }
03536 break;
03537 case GET_const_a:
03538 if (value=='=')
03539 {
03540 open_bracket(file);
03541 bool finished=false;
03542 int32_t i=0;
03543
03544 if (verbose)
03545 #ifdef USE_HMMDEBUG
03546 SG_DEBUG( "\nconst for transition matrix: \n") ;
03547 #else
03548 SG_DEBUG( "\nconst for transition matrix: ") ;
03549 #endif
03550 while (!finished)
03551 {
03552 open_bracket(file);
03553
03554 if (get_numbuffer(file, buffer, 4))
03555 {
03556 value=atoi(buffer);
03557 model->set_const_a(i++, value);
03558
03559 if (value<0)
03560 {
03561 finished=true;
03562 model->set_const_a(i++, value);
03563 model->set_const_a_val((int32_t)i/2 - 1, value);
03564 break;
03565 }
03566 if (value>=N)
03567 SG_ERROR( "invalid value for const_a(%i,0): %i\n",i/2,(int)value) ;
03568 }
03569 else
03570 break;
03571
03572 comma_or_space(file);
03573
03574 if (get_numbuffer(file, buffer, 4))
03575 {
03576 value=atoi(buffer);
03577 model->set_const_a(i++, value);
03578
03579 if (value<0)
03580 {
03581 finished=true;
03582 model->set_const_a_val((int32_t)i/2 - 1, value);
03583 break;
03584 }
03585 if (value>=N)
03586 SG_ERROR( "invalid value for const_a(%i,1): %i\n",i/2-1,(int)value) ;
03587 }
03588 else
03589 break;
03590
03591 if (!comma_or_space(file))
03592 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03593 else
03594 if (get_numbuffer(file, buffer, 10))
03595 {
03596 float64_t dvalue=atof(buffer);
03597 model->set_const_a_val((int32_t)i/2 - 1, dvalue);
03598 if (dvalue<0)
03599 {
03600 finished=true;
03601 break;
03602 }
03603 if ((dvalue>1.0) || (dvalue<0.0))
03604 SG_ERROR( "invalid value for const_a_val(%i): %e\n",(int)i/2-1,dvalue) ;
03605 }
03606 else
03607 model->set_const_a_val((int32_t)i/2 - 1, 1.0);
03608
03609 #ifdef USE_HMMDEBUG
03610 if (verbose)
03611 SG_ERROR("const_a(%i,%i)=%e\n", model->get_const_a((int32_t)i/2-1,0),model->get_const_a((int32_t)i/2-1,1),model->get_const_a_val((int32_t)i/2-1)) ;
03612 #endif
03613 close_bracket(file);
03614 }
03615 close_bracket(file);
03616 if (verbose)
03617 SG_DEBUG( "%i Entries",(int)i/2-1) ;
03618
03619 if (finished)
03620 {
03621 received_params|=GOTconst_a;
03622 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03623 }
03624 else
03625 state=END;
03626 }
03627 break;
03628
03629 case GET_const_b:
03630 if (value=='=')
03631 {
03632 open_bracket(file);
03633 bool finished=false;
03634 int32_t i=0;
03635
03636 if (verbose)
03637 #ifdef USE_HMMDEBUG
03638 SG_DEBUG( "\nconst for emission matrix: \n") ;
03639 #else
03640 SG_DEBUG( "\nconst for emission matrix: ") ;
03641 #endif
03642 while (!finished)
03643 {
03644 open_bracket(file);
03645 int32_t combine=0;
03646 for (int32_t j=0; j<3; j++)
03647 {
03648 if (get_numbuffer(file, buffer, 10))
03649 {
03650 if (j==0)
03651 {
03652 value=atoi(buffer);
03653
03654 model->set_const_b(i++, value);
03655
03656 if (value<0)
03657 {
03658 finished=true;
03659
03660 break;
03661 }
03662 if (value>=N)
03663 SG_ERROR( "invalid value for const_b(%i,0): %i\n",i/2-1,(int)value) ;
03664 }
03665 else if (j==2)
03666 {
03667 float64_t dvalue=atof(buffer);
03668 model->set_const_b_val((int32_t)(i-1)/2, dvalue);
03669 if (dvalue<0)
03670 {
03671 finished=true;
03672 break;
03673 } ;
03674 if ((dvalue>1.0) || (dvalue<0.0))
03675 SG_ERROR( "invalid value for const_b_val(%i,1): %e\n",i/2-1,dvalue) ;
03676 }
03677 else
03678 {
03679 value=atoi(buffer);
03680 combine= value;
03681 } ;
03682 }
03683 else
03684 {
03685 if (j==2)
03686 model->set_const_b_val((int32_t)(i-1)/2, 1.0);
03687 break;
03688 } ;
03689 if (j<2)
03690 if ((!comma_or_space(file)) && (j==1))
03691 {
03692 model->set_const_b_val((int32_t)(i-1)/2, 1.0) ;
03693 break ;
03694 } ;
03695 }
03696 close_bracket(file);
03697 model->set_const_b(i++, combine);
03698 if (combine>=M)
03699 SG_ERROR("invalid value for const_b(%i,1): %i\n",i/2-1, combine) ;
03700 #ifdef USE_HMMDEBUG
03701 if (verbose && !finished)
03702 SG_ERROR("const_b(%i,%i)=%e\n", model->get_const_b((int32_t)i/2-1,0),model->get_const_b((int32_t)i/2-1,1),model->get_const_b_val((int32_t)i/2-1)) ;
03703 #endif
03704 }
03705 close_bracket(file);
03706 if (verbose)
03707 SG_ERROR( "%i Entries",(int)i/2-1) ;
03708
03709 if (finished)
03710 {
03711 received_params|=GOTconst_b;
03712 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03713 }
03714 else
03715 state=END;
03716 }
03717 break;
03718 case GET_const_p:
03719 if (value=='=')
03720 {
03721 open_bracket(file);
03722 bool finished=false;
03723 int32_t i=0;
03724
03725 if (verbose)
03726 #ifdef USE_HMMDEBUG
03727 SG_DEBUG( "\nconst for start states: \n") ;
03728 #else
03729 SG_DEBUG( "\nconst for start states: ") ;
03730 #endif
03731 while (!finished)
03732 {
03733 open_bracket(file);
03734
03735 if (get_numbuffer(file, buffer, 4))
03736 {
03737 value=atoi(buffer);
03738 model->set_const_p(i, value);
03739
03740 if (value<0)
03741 {
03742 finished=true;
03743 model->set_const_p_val(i++, value);
03744 break;
03745 }
03746 if (value>=N)
03747 SG_ERROR( "invalid value for const_p(%i): %i\n",i,(int)value) ;
03748
03749 }
03750 else
03751 break;
03752
03753 if (!comma_or_space(file))
03754 model->set_const_p_val(i++, 1.0);
03755 else
03756 if (get_numbuffer(file, buffer, 10))
03757 {
03758 float64_t dvalue=atof(buffer);
03759 model->set_const_p_val(i++, dvalue);
03760 if (dvalue<0)
03761 {
03762 finished=true;
03763 break;
03764 }
03765 if ((dvalue>1) || (dvalue<0))
03766 SG_ERROR( "invalid value for const_p_val(%i): %e\n",i,dvalue) ;
03767 }
03768 else
03769 model->set_const_p_val(i++, 1.0);
03770
03771 close_bracket(file);
03772
03773 #ifdef USE_HMMDEBUG
03774 if (verbose)
03775 SG_DEBUG("const_p(%i)=%e\n", model->get_const_p(i-1),model->get_const_p_val(i-1)) ;
03776 #endif
03777 }
03778 if (verbose)
03779 SG_DEBUG( "%i Entries",i-1) ;
03780
03781 close_bracket(file);
03782
03783 if (finished)
03784 {
03785 received_params|=GOTconst_p;
03786 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03787 }
03788 else
03789 state=END;
03790 }
03791 break;
03792 case GET_const_q:
03793 if (value=='=')
03794 {
03795 open_bracket(file);
03796 bool finished=false;
03797 if (verbose)
03798 #ifdef USE_HMMDEBUG
03799 SG_DEBUG( "\nconst for terminal states: \n") ;
03800 #else
03801 SG_DEBUG( "\nconst for terminal states: ") ;
03802 #endif
03803 int32_t i=0;
03804
03805 while (!finished)
03806 {
03807 open_bracket(file) ;
03808 if (get_numbuffer(file, buffer, 4))
03809 {
03810 value=atoi(buffer);
03811 model->set_const_q(i, value);
03812 if (value<0)
03813 {
03814 finished=true;
03815 model->set_const_q_val(i++, value);
03816 break;
03817 }
03818 if (value>=N)
03819 SG_ERROR( "invalid value for const_q(%i): %i\n",i,(int)value) ;
03820 }
03821 else
03822 break;
03823
03824 if (!comma_or_space(file))
03825 model->set_const_q_val(i++, 1.0);
03826 else
03827 if (get_numbuffer(file, buffer, 10))
03828 {
03829 float64_t dvalue=atof(buffer);
03830 model->set_const_q_val(i++, dvalue);
03831 if (dvalue<0)
03832 {
03833 finished=true;
03834 break;
03835 }
03836 if ((dvalue>1) || (dvalue<0))
03837 SG_ERROR("invalid value for const_q_val(%i): %e\n",i,(double) dvalue) ;
03838 }
03839 else
03840 model->set_const_q_val(i++, 1.0);
03841
03842 close_bracket(file);
03843 #ifdef USE_HMMDEBUG
03844 if (verbose)
03845 SG_DEBUG("const_q(%i)=%e\n", model->get_const_q(i-1),model->get_const_q_val(i-1)) ;
03846 #endif
03847 }
03848 if (verbose)
03849 SG_DEBUG( "%i Entries",i-1) ;
03850
03851 close_bracket(file);
03852
03853 if (finished)
03854 {
03855 received_params|=GOTconst_q;
03856 state= (received_params == (GOTlearn_a | GOTlearn_b | GOTlearn_p | GOTlearn_q |GOTconst_a | GOTconst_b | GOTconst_p | GOTconst_q)) ? END : INITIAL;
03857 }
03858 else
03859 state=END;
03860 }
03861 break;
03862 case COMMENT:
03863 if (value==EOF)
03864 state=END;
03865 else if (value=='\n')
03866 state=INITIAL;
03867 break;
03868
03869 default:
03870 break;
03871 }
03872 }
03873 }
03874
03875
03876
03877
03878
03879 result=1 ;
03880 if (result)
03881 {
03882 model->sort_learn_a() ;
03883 model->sort_learn_b() ;
03884 if (_initialize)
03885 {
03886 init_model_defined(); ;
03887 convert_to_log();
03888 } ;
03889 }
03890 if (verbose)
03891 SG_DEBUG( "\n") ;
03892 return result;
03893 }
03894
03895
03896
03897
03898
03899
03900
03901
03902
03903
03904
03905
03906
03907
03908
03909
03910
03911
03912
03913
03914
03915
03916
03917
03918
03919
03920
03921
03922
03923
03924
03925
03926
03927
03928
03929 bool CHMM::save_model(FILE* file)
03930 {
03931 bool result=false;
03932 int32_t i,j;
03933 const float32_t NAN_REPLACEMENT = (float32_t) CMath::ALMOST_NEG_INFTY ;
03934
03935 if (file)
03936 {
03937 fprintf(file,"%s","% HMM - specification\n% N - number of states\n% M - number of observation_tokens\n% a is state_transition_matrix\n% size(a)= [N,N]\n%\n% b is observation_per_state_matrix\n% size(b)= [N,M]\n%\n% p is initial distribution\n% size(p)= [1, N]\n\n% q is distribution of end states\n% size(q)= [1, N]\n");
03938 fprintf(file,"N=%d;\n",N);
03939 fprintf(file,"M=%d;\n",M);
03940
03941 fprintf(file,"p=[");
03942 for (i=0; i<N; i++)
03943 {
03944 if (i<N-1) {
03945 if (CMath::is_finite(get_p(i)))
03946 fprintf(file, "%e,", (double)get_p(i));
03947 else
03948 fprintf(file, "%f,", NAN_REPLACEMENT);
03949 }
03950 else {
03951 if (CMath::is_finite(get_p(i)))
03952 fprintf(file, "%e", (double)get_p(i));
03953 else
03954 fprintf(file, "%f", NAN_REPLACEMENT);
03955 }
03956 }
03957
03958 fprintf(file,"];\n\nq=[");
03959 for (i=0; i<N; i++)
03960 {
03961 if (i<N-1) {
03962 if (CMath::is_finite(get_q(i)))
03963 fprintf(file, "%e,", (double)get_q(i));
03964 else
03965 fprintf(file, "%f,", NAN_REPLACEMENT);
03966 }
03967 else {
03968 if (CMath::is_finite(get_q(i)))
03969 fprintf(file, "%e", (double)get_q(i));
03970 else
03971 fprintf(file, "%f", NAN_REPLACEMENT);
03972 }
03973 }
03974 fprintf(file,"];\n\na=[");
03975
03976 for (i=0; i<N; i++)
03977 {
03978 fprintf(file, "\t[");
03979
03980 for (j=0; j<N; j++)
03981 {
03982 if (j<N-1) {
03983 if (CMath::is_finite(get_a(i,j)))
03984 fprintf(file, "%e,", (double)get_a(i,j));
03985 else
03986 fprintf(file, "%f,", NAN_REPLACEMENT);
03987 }
03988 else {
03989 if (CMath::is_finite(get_a(i,j)))
03990 fprintf(file, "%e];\n", (double)get_a(i,j));
03991 else
03992 fprintf(file, "%f];\n", NAN_REPLACEMENT);
03993 }
03994 }
03995 }
03996
03997 fprintf(file," ];\n\nb=[");
03998
03999 for (i=0; i<N; i++)
04000 {
04001 fprintf(file, "\t[");
04002
04003 for (j=0; j<M; j++)
04004 {
04005 if (j<M-1) {
04006 if (CMath::is_finite(get_b(i,j)))
04007 fprintf(file, "%e,", (double)get_b(i,j));
04008 else
04009 fprintf(file, "%f,", NAN_REPLACEMENT);
04010 }
04011 else {
04012 if (CMath::is_finite(get_b(i,j)))
04013 fprintf(file, "%e];\n", (double)get_b(i,j));
04014 else
04015 fprintf(file, "%f];\n", NAN_REPLACEMENT);
04016 }
04017 }
04018 }
04019 result= (fprintf(file," ];\n") == 5);
04020 }
04021
04022 return result;
04023 }
04024
04025 T_STATES* CHMM::get_path(int32_t dim, float64_t& prob)
04026 {
04027 T_STATES* result = NULL;
04028
04029 prob = best_path(dim);
04030 result = SG_MALLOC(T_STATES, p_observations->get_vector_length(dim));
04031
04032 for (int32_t i=0; i<p_observations->get_vector_length(dim); i++)
04033 result[i]=PATH(dim)[i];
04034
04035 return result;
04036 }
04037
04038 bool CHMM::save_path(FILE* file)
04039 {
04040 bool result=false;
04041
04042 if (file)
04043 {
04044 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04045 {
04046 if (dim%100==0)
04047 SG_PRINT( "%i..", dim) ;
04048 float64_t prob = best_path(dim);
04049 fprintf(file,"%i. path probability:%e\nstate sequence:\n", dim, prob);
04050 for (int32_t i=0; i<p_observations->get_vector_length(dim)-1; i++)
04051 fprintf(file,"%d ", PATH(dim)[i]);
04052 fprintf(file,"%d", PATH(dim)[p_observations->get_vector_length(dim)-1]);
04053 fprintf(file,"\n\n") ;
04054 }
04055 SG_DONE();
04056 result=true;
04057 }
04058
04059 return result;
04060 }
04061
04062 bool CHMM::save_likelihood_bin(FILE* file)
04063 {
04064 bool result=false;
04065
04066 if (file)
04067 {
04068 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04069 {
04070 float32_t prob= (float32_t) model_probability(dim);
04071 fwrite(&prob, sizeof(float32_t), 1, file);
04072 }
04073 result=true;
04074 }
04075
04076 return result;
04077 }
04078
04079 bool CHMM::save_likelihood(FILE* file)
04080 {
04081 bool result=false;
04082
04083 if (file)
04084 {
04085 fprintf(file, "%% likelihood of model per observation\n%% P[O|model]=[ P[O|model]_1 P[O|model]_2 ... P[O|model]_dim ]\n%%\n");
04086
04087 fprintf(file, "P=[");
04088 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04089 fprintf(file, "%e ", (double) model_probability(dim));
04090
04091 fprintf(file,"];");
04092 result=true;
04093 }
04094
04095 return result;
04096 }
04097
04098 #define FLOATWRITE(file, value) { float32_t rrr=float32_t(value); fwrite(&rrr, sizeof(float32_t), 1, file); num_floats++;}
04099
04100 bool CHMM::save_model_bin(FILE* file)
04101 {
04102 int32_t i,j,q, num_floats=0 ;
04103 if (!model)
04104 {
04105 if (file)
04106 {
04107
04108 FLOATWRITE(file, (float32_t)CMath::INFTY);
04109 FLOATWRITE(file, (float32_t) 1);
04110
04111
04112 for (i=0; i<N; i++)
04113 FLOATWRITE(file, get_p(i));
04114 SG_INFO( "wrote %i parameters for p\n",N) ;
04115
04116 for (i=0; i<N; i++)
04117 FLOATWRITE(file, get_q(i)) ;
04118 SG_INFO( "wrote %i parameters for q\n",N) ;
04119
04120
04121 for (i=0; i<N; i++)
04122 for (j=0; j<N; j++)
04123 FLOATWRITE(file, get_a(i,j));
04124 SG_INFO( "wrote %i parameters for a\n",N*N) ;
04125
04126 for (i=0; i<N; i++)
04127 for (j=0; j<M; j++)
04128 FLOATWRITE(file, get_b(i,j));
04129 SG_INFO( "wrote %i parameters for b\n",N*M) ;
04130
04131
04132 FLOATWRITE(file, (float32_t)CMath::INFTY);
04133 FLOATWRITE(file, (float32_t) 3);
04134
04135
04136 FLOATWRITE(file, (float32_t) N);
04137 FLOATWRITE(file, (float32_t) N);
04138 FLOATWRITE(file, (float32_t) N*N);
04139 FLOATWRITE(file, (float32_t) N*M);
04140 FLOATWRITE(file, (float32_t) N);
04141 FLOATWRITE(file, (float32_t) M);
04142 } ;
04143 }
04144 else
04145 {
04146 if (file)
04147 {
04148 int32_t num_p, num_q, num_a, num_b ;
04149
04150 FLOATWRITE(file, (float32_t)CMath::INFTY);
04151 FLOATWRITE(file, (float32_t) 2);
04152
04153 for (i=0; model->get_learn_p(i)>=0; i++)
04154 FLOATWRITE(file, get_p(model->get_learn_p(i)));
04155 num_p=i ;
04156 SG_INFO( "wrote %i parameters for p\n",num_p) ;
04157
04158 for (i=0; model->get_learn_q(i)>=0; i++)
04159 FLOATWRITE(file, get_q(model->get_learn_q(i)));
04160 num_q=i ;
04161 SG_INFO( "wrote %i parameters for q\n",num_q) ;
04162
04163
04164 for (q=0; model->get_learn_a(q,1)>=0; q++)
04165 {
04166 i=model->get_learn_a(q,0) ;
04167 j=model->get_learn_a(q,1) ;
04168 FLOATWRITE(file, (float32_t)i);
04169 FLOATWRITE(file, (float32_t)j);
04170 FLOATWRITE(file, get_a(i,j));
04171 } ;
04172 num_a=q ;
04173 SG_INFO( "wrote %i parameters for a\n",num_a) ;
04174
04175 for (q=0; model->get_learn_b(q,0)>=0; q++)
04176 {
04177 i=model->get_learn_b(q,0) ;
04178 j=model->get_learn_b(q,1) ;
04179 FLOATWRITE(file, (float32_t)i);
04180 FLOATWRITE(file, (float32_t)j);
04181 FLOATWRITE(file, get_b(i,j));
04182 } ;
04183 num_b=q ;
04184 SG_INFO( "wrote %i parameters for b\n",num_b) ;
04185
04186
04187 FLOATWRITE(file, (float32_t)CMath::INFTY);
04188 FLOATWRITE(file, (float32_t) 3);
04189
04190
04191 FLOATWRITE(file, (float32_t) num_p);
04192 FLOATWRITE(file, (float32_t) num_q);
04193 FLOATWRITE(file, (float32_t) num_a);
04194 FLOATWRITE(file, (float32_t) num_b);
04195 FLOATWRITE(file, (float32_t) N);
04196 FLOATWRITE(file, (float32_t) M);
04197 } ;
04198 } ;
04199 return true ;
04200 }
04201
04202 bool CHMM::save_path_derivatives(FILE* logfile)
04203 {
04204 int32_t dim,i,j;
04205
04206 if (logfile)
04207 {
04208 fprintf(logfile,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%% \n%% calculating derivatives of P[O,Q|lambda]=p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04209 fprintf(logfile,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04210 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04211 fprintf(logfile,"%% ............................. \n");
04212 fprintf(logfile,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04213 fprintf(logfile,"%% ];\n%%\n\ndPr(log()) = [\n");
04214 }
04215 else
04216 return false ;
04217
04218 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04219 {
04220 best_path(dim);
04221
04222 fprintf(logfile, "[ ");
04223
04224
04225 for (i=0; i<N; i++)
04226 fprintf(logfile,"%e, ", (double) path_derivative_p(i,dim) );
04227
04228 for (i=0; i<N; i++)
04229 fprintf(logfile,"%e, ", (double) path_derivative_q(i,dim) );
04230
04231
04232 for (i=0; i<N; i++)
04233 for (j=0; j<N; j++)
04234 fprintf(logfile, "%e,", (double) path_derivative_a(i,j,dim) );
04235
04236 for (i=0; i<N; i++)
04237 for (j=0; j<M; j++)
04238 fprintf(logfile, "%e,", (double) path_derivative_b(i,j,dim) );
04239
04240 fseek(logfile,ftell(logfile)-1,SEEK_SET);
04241 fprintf(logfile, " ];\n");
04242 }
04243
04244 fprintf(logfile, "];");
04245
04246 return true ;
04247 }
04248
04249 bool CHMM::save_path_derivatives_bin(FILE* logfile)
04250 {
04251 bool result=false;
04252 int32_t dim,i,j,q;
04253 float64_t prob=0 ;
04254 int32_t num_floats=0 ;
04255
04256 float64_t sum_prob=0.0 ;
04257 if (!model)
04258 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04259 else
04260 SG_INFO( "writing derivatives of changed weights only\n") ;
04261
04262 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04263 {
04264 if (dim%100==0)
04265 {
04266 SG_PRINT( ".") ;
04267
04268 } ;
04269
04270 prob=best_path(dim);
04271 sum_prob+=prob ;
04272
04273 if (!model)
04274 {
04275 if (logfile)
04276 {
04277
04278 FLOATWRITE(logfile, prob);
04279
04280 for (i=0; i<N; i++)
04281 FLOATWRITE(logfile, path_derivative_p(i,dim));
04282
04283 for (i=0; i<N; i++)
04284 FLOATWRITE(logfile, path_derivative_q(i,dim));
04285
04286 for (i=0; i<N; i++)
04287 for (j=0; j<N; j++)
04288 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04289
04290 for (i=0; i<N; i++)
04291 for (j=0; j<M; j++)
04292 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04293
04294 }
04295 }
04296 else
04297 {
04298 if (logfile)
04299 {
04300
04301 FLOATWRITE(logfile, prob);
04302
04303 for (i=0; model->get_learn_p(i)>=0; i++)
04304 FLOATWRITE(logfile, path_derivative_p(model->get_learn_p(i),dim));
04305
04306 for (i=0; model->get_learn_q(i)>=0; i++)
04307 FLOATWRITE(logfile, path_derivative_q(model->get_learn_q(i),dim));
04308
04309 for (q=0; model->get_learn_a(q,0)>=0; q++)
04310 {
04311 i=model->get_learn_a(q,0) ;
04312 j=model->get_learn_a(q,1) ;
04313 FLOATWRITE(logfile, path_derivative_a(i,j,dim));
04314 } ;
04315
04316 for (q=0; model->get_learn_b(q,0)>=0; q++)
04317 {
04318 i=model->get_learn_b(q,0) ;
04319 j=model->get_learn_b(q,1) ;
04320 FLOATWRITE(logfile, path_derivative_b(i,j,dim));
04321 } ;
04322 }
04323 } ;
04324 }
04325 save_model_bin(logfile) ;
04326
04327 result=true;
04328 SG_PRINT( "\n") ;
04329 return result;
04330 }
04331
04332 bool CHMM::save_model_derivatives_bin(FILE* file)
04333 {
04334 bool result=false;
04335 int32_t dim,i,j,q ;
04336 int32_t num_floats=0 ;
04337
04338 if (!model)
04339 SG_WARNING( "No definitions loaded -- writing derivatives of all weights\n") ;
04340 else
04341 SG_INFO( "writing derivatives of changed weights only\n") ;
04342
04343 #ifdef USE_HMMPARALLEL
04344 int32_t num_threads = parallel->get_num_threads();
04345 pthread_t *threads=SG_MALLOC(pthread_t, num_threads);
04346 S_DIM_THREAD_PARAM *params=SG_MALLOC(S_DIM_THREAD_PARAM, num_threads);
04347
04348 if (p_observations->get_num_vectors()<num_threads)
04349 num_threads=p_observations->get_num_vectors();
04350 #endif
04351
04352 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04353 {
04354 if (dim%20==0)
04355 {
04356 SG_PRINT( ".") ;
04357
04358 } ;
04359
04360 #ifdef USE_HMMPARALLEL
04361 if (dim%num_threads==0)
04362 {
04363 for (i=0; i<num_threads; i++)
04364 {
04365 if (dim+i<p_observations->get_num_vectors())
04366 {
04367 params[i].hmm=this ;
04368 params[i].dim=dim+i ;
04369 pthread_create(&threads[i], NULL, bw_dim_prefetch, (void*)¶ms[i]) ;
04370 }
04371 }
04372
04373 for (i=0; i<num_threads; i++)
04374 {
04375 if (dim+i<p_observations->get_num_vectors())
04376 pthread_join(threads[i], NULL);
04377 }
04378 }
04379 #endif
04380
04381 float64_t prob=model_probability(dim) ;
04382 if (!model)
04383 {
04384 if (file)
04385 {
04386
04387 FLOATWRITE(file, prob);
04388
04389
04390 for (i=0; i<N; i++)
04391 FLOATWRITE(file, model_derivative_p(i,dim));
04392
04393 for (i=0; i<N; i++)
04394 FLOATWRITE(file, model_derivative_q(i,dim));
04395
04396
04397 for (i=0; i<N; i++)
04398 for (j=0; j<N; j++)
04399 FLOATWRITE(file, model_derivative_a(i,j,dim));
04400
04401 for (i=0; i<N; i++)
04402 for (j=0; j<M; j++)
04403 FLOATWRITE(file, model_derivative_b(i,j,dim));
04404
04405 if (dim==0)
04406 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04407 } ;
04408 }
04409 else
04410 {
04411 if (file)
04412 {
04413
04414 FLOATWRITE(file, prob);
04415
04416 for (i=0; model->get_learn_p(i)>=0; i++)
04417 FLOATWRITE(file, model_derivative_p(model->get_learn_p(i),dim));
04418
04419 for (i=0; model->get_learn_q(i)>=0; i++)
04420 FLOATWRITE(file, model_derivative_q(model->get_learn_q(i),dim));
04421
04422
04423 for (q=0; model->get_learn_a(q,1)>=0; q++)
04424 {
04425 i=model->get_learn_a(q,0) ;
04426 j=model->get_learn_a(q,1) ;
04427 FLOATWRITE(file, model_derivative_a(i,j,dim));
04428 } ;
04429
04430 for (q=0; model->get_learn_b(q,0)>=0; q++)
04431 {
04432 i=model->get_learn_b(q,0) ;
04433 j=model->get_learn_b(q,1) ;
04434 FLOATWRITE(file, model_derivative_b(i,j,dim));
04435 } ;
04436 if (dim==0)
04437 SG_INFO("Number of parameters (including posterior prob.): %i\n", num_floats) ;
04438 } ;
04439 } ;
04440 }
04441 save_model_bin(file) ;
04442
04443 #ifdef USE_HMMPARALLEL
04444 SG_FREE(threads);
04445 SG_FREE(params);
04446 #endif
04447
04448 result=true;
04449 SG_PRINT( "\n") ;
04450 return result;
04451 }
04452
04453 bool CHMM::save_model_derivatives(FILE* file)
04454 {
04455 bool result=false;
04456 int32_t dim,i,j;
04457
04458 if (file)
04459 {
04460
04461 fprintf(file,"%% lambda denotes the model\n%% O denotes the observation sequence\n%% Q denotes the path\n%%\n%% calculating derivatives of P[O|lambda]=sum_{all Q}p_{Q1}b_{Q1}(O_1}*a_{Q1}{Q2}b_{Q2}(O2)*...*q_{T-1}{T}b_{QT}(O_T}q_{Q_T} against p,q,a,b\n%%\n");
04462 fprintf(file,"%% dPr[...]=[ [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04463 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_NN]\n");
04464 fprintf(file,"%% ............................. \n");
04465 fprintf(file,"%% [dp_1,...,dp_N,dq_1,...,dq_N, da_11,da_12,..,da_1N,..,da_NN, db_11,.., db_MM]\n");
04466 fprintf(file,"%% ];\n%%\n\nlog(dPr) = [\n");
04467
04468
04469 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
04470 {
04471 fprintf(file, "[ ");
04472
04473
04474 for (i=0; i<N; i++)
04475 fprintf(file,"%e, ", (double) model_derivative_p(i, dim) );
04476
04477 for (i=0; i<N; i++)
04478 fprintf(file,"%e, ", (double) model_derivative_q(i, dim) );
04479
04480
04481 for (i=0; i<N; i++)
04482 for (j=0; j<N; j++)
04483 fprintf(file, "%e,", (double) model_derivative_a(i,j,dim) );
04484
04485 for (i=0; i<N; i++)
04486 for (j=0; j<M; j++)
04487 fprintf(file, "%e,", (double) model_derivative_b(i,j,dim) );
04488
04489 fseek(file,ftell(file)-1,SEEK_SET);
04490 fprintf(file, " ];\n");
04491 }
04492
04493
04494 fprintf(file, "];");
04495
04496 result=true;
04497 }
04498 return result;
04499 }
04500
04501 bool CHMM::check_model_derivatives_combined()
04502 {
04503
04504 const float64_t delta=5e-4 ;
04505
04506 int32_t i ;
04507
04508
04509
04510
04511
04512
04513
04514
04515
04516
04517
04518
04519
04520
04521
04522
04523
04524
04525
04526
04527
04528
04529
04530
04531
04532
04533
04534
04535
04536
04537
04538 i=0;
04539 {
04540 for (int32_t j=0; j<M; j++)
04541 {
04542 float64_t old_b=get_b(i,j) ;
04543
04544 set_b(i,j, log(exp(old_b)-delta)) ;
04545 invalidate_model() ;
04546 float64_t prob_old=(model_probability(-1)*p_observations->get_num_vectors()) ;
04547
04548 set_b(i,j, log(exp(old_b)+delta)) ;
04549 invalidate_model() ;
04550 float64_t prob_new=(model_probability(-1)*p_observations->get_num_vectors());
04551
04552 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04553
04554 set_b(i,j, old_b) ;
04555 invalidate_model() ;
04556
04557 float64_t deriv_calc=0 ;
04558 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04559 {
04560 deriv_calc+=exp(model_derivative_b(i, j, dim)-model_probability(dim)) ;
04561 if (j==1)
04562 SG_INFO("deriv_calc[%i]=%e\n",dim,deriv_calc) ;
04563 } ;
04564
04565 SG_ERROR( "b(%i,%i)=%e db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j,exp(old_b),i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04566 } ;
04567 } ;
04568 return true ;
04569 }
04570
04571 bool CHMM::check_model_derivatives()
04572 {
04573 bool result=false;
04574 const float64_t delta=3e-4 ;
04575
04576 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04577 {
04578 int32_t i ;
04579
04580 for (i=0; i<N; i++)
04581 {
04582 for (int32_t j=0; j<N; j++)
04583 {
04584 float64_t old_a=get_a(i,j) ;
04585
04586 set_a(i,j, log(exp(old_a)-delta)) ;
04587 invalidate_model() ;
04588 float64_t prob_old=exp(model_probability(dim)) ;
04589
04590 set_a(i,j, log(exp(old_a)+delta)) ;
04591 invalidate_model() ;
04592 float64_t prob_new=exp(model_probability(dim));
04593
04594 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04595
04596 set_a(i,j, old_a) ;
04597 invalidate_model() ;
04598 float64_t deriv_calc=exp(model_derivative_a(i, j, dim)) ;
04599
04600 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04601 invalidate_model() ;
04602 } ;
04603 } ;
04604 for (i=0; i<N; i++)
04605 {
04606 for (int32_t j=0; j<M; j++)
04607 {
04608 float64_t old_b=get_b(i,j) ;
04609
04610 set_b(i,j, log(exp(old_b)-delta)) ;
04611 invalidate_model() ;
04612 float64_t prob_old=exp(model_probability(dim)) ;
04613
04614 set_b(i,j, log(exp(old_b)+delta)) ;
04615 invalidate_model() ;
04616 float64_t prob_new=exp(model_probability(dim));
04617
04618 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04619
04620 set_b(i,j, old_b) ;
04621 invalidate_model() ;
04622 float64_t deriv_calc=exp(model_derivative_b(i, j, dim));
04623
04624 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04625 } ;
04626 } ;
04627
04628 #ifdef TEST
04629 for (i=0; i<N; i++)
04630 {
04631 float64_t old_p=get_p(i) ;
04632
04633 set_p(i, log(exp(old_p)-delta)) ;
04634 invalidate_model() ;
04635 float64_t prob_old=exp(model_probability(dim)) ;
04636
04637 set_p(i, log(exp(old_p)+delta)) ;
04638 invalidate_model() ;
04639 float64_t prob_new=exp(model_probability(dim));
04640 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04641
04642 set_p(i, old_p) ;
04643 invalidate_model() ;
04644 float64_t deriv_calc=exp(model_derivative_p(i, dim));
04645
04646
04647 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04648 } ;
04649 for (i=0; i<N; i++)
04650 {
04651 float64_t old_q=get_q(i) ;
04652
04653 set_q(i, log(exp(old_q)-delta)) ;
04654 invalidate_model() ;
04655 float64_t prob_old=exp(model_probability(dim)) ;
04656
04657 set_q(i, log(exp(old_q)+delta)) ;
04658 invalidate_model() ;
04659 float64_t prob_new=exp(model_probability(dim));
04660
04661 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04662
04663 set_q(i, old_q) ;
04664 invalidate_model() ;
04665 float64_t deriv_calc=exp(model_derivative_q(i, dim));
04666
04667
04668 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04669 } ;
04670 #endif
04671 }
04672 return result;
04673 }
04674
04675 #ifdef USE_HMMDEBUG
04676 bool CHMM::check_path_derivatives()
04677 {
04678 bool result=false;
04679 const float64_t delta=1e-4 ;
04680
04681 for (int32_t dim=0; dim<p_observations->get_num_vectors(); dim++)
04682 {
04683 int32_t i ;
04684
04685 for (i=0; i<N; i++)
04686 {
04687 for (int32_t j=0; j<N; j++)
04688 {
04689 float64_t old_a=get_a(i,j) ;
04690
04691 set_a(i,j, log(exp(old_a)-delta)) ;
04692 invalidate_model() ;
04693 float64_t prob_old=best_path(dim) ;
04694
04695 set_a(i,j, log(exp(old_a)+delta)) ;
04696 invalidate_model() ;
04697 float64_t prob_new=best_path(dim);
04698
04699 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04700
04701 set_a(i,j, old_a) ;
04702 invalidate_model() ;
04703 float64_t deriv_calc=path_derivative_a(i, j, dim) ;
04704
04705 SG_DEBUG( "da(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04706 } ;
04707 } ;
04708 for (i=0; i<N; i++)
04709 {
04710 for (int32_t j=0; j<M; j++)
04711 {
04712 float64_t old_b=get_b(i,j) ;
04713
04714 set_b(i,j, log(exp(old_b)-delta)) ;
04715 invalidate_model() ;
04716 float64_t prob_old=best_path(dim) ;
04717
04718 set_b(i,j, log(exp(old_b)+delta)) ;
04719 invalidate_model() ;
04720 float64_t prob_new=best_path(dim);
04721
04722 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04723
04724 set_b(i,j, old_b) ;
04725 invalidate_model() ;
04726 float64_t deriv_calc=path_derivative_b(i, j, dim);
04727
04728 SG_DEBUG( "db(%i,%i) = %e:%e\t (%1.5f%%)\n", i,j, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/(deriv_calc));
04729 } ;
04730 } ;
04731
04732 for (i=0; i<N; i++)
04733 {
04734 float64_t old_p=get_p(i) ;
04735
04736 set_p(i, log(exp(old_p)-delta)) ;
04737 invalidate_model() ;
04738 float64_t prob_old=best_path(dim) ;
04739
04740 set_p(i, log(exp(old_p)+delta)) ;
04741 invalidate_model() ;
04742 float64_t prob_new=best_path(dim);
04743 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04744
04745 set_p(i, old_p) ;
04746 invalidate_model() ;
04747 float64_t deriv_calc=path_derivative_p(i, dim);
04748
04749
04750 SG_DEBUG( "dp(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04751 } ;
04752 for (i=0; i<N; i++)
04753 {
04754 float64_t old_q=get_q(i) ;
04755
04756 set_q(i, log(exp(old_q)-delta)) ;
04757 invalidate_model() ;
04758 float64_t prob_old=best_path(dim) ;
04759
04760 set_q(i, log(exp(old_q)+delta)) ;
04761 invalidate_model() ;
04762 float64_t prob_new=best_path(dim);
04763
04764 float64_t deriv = (prob_new-prob_old)/(2*delta) ;
04765
04766 set_q(i, old_q) ;
04767 invalidate_model() ;
04768 float64_t deriv_calc=path_derivative_q(i, dim);
04769
04770
04771 SG_DEBUG( "dq(%i) = %e:%e\t (%1.5f%%)\n", i, deriv_calc, deriv, 100.0*(deriv-deriv_calc)/deriv_calc);
04772 } ;
04773 }
04774 return result;
04775 }
04776 #endif // USE_HMMDEBUG
04777
04778
04779 void CHMM::normalize(bool keep_dead_states)
04780 {
04781 int32_t i,j;
04782 const float64_t INF=-1e10;
04783 float64_t sum_p =INF;
04784
04785 for (i=0; i<N; i++)
04786 {
04787 sum_p=CMath::logarithmic_sum(sum_p, get_p(i));
04788
04789 float64_t sum_b =INF;
04790 float64_t sum_a =get_q(i);
04791
04792 for (j=0; j<N; j++)
04793 sum_a=CMath::logarithmic_sum(sum_a, get_a(i,j));
04794
04795 if (sum_a>CMath::ALMOST_NEG_INFTY/N || (!keep_dead_states) )
04796 {
04797 for (j=0; j<N; j++)
04798 set_a(i,j, get_a(i,j)-sum_a);
04799 set_q(i, get_q(i)-sum_a);
04800 }
04801
04802 for (j=0; j<M; j++)
04803 sum_b=CMath::logarithmic_sum(sum_b, get_b(i,j));
04804 for (j=0; j<M; j++)
04805 set_b(i,j, get_b(i,j)-sum_b);
04806 }
04807
04808 for (i=0; i<N; i++)
04809 set_p(i, get_p(i)-sum_p);
04810
04811 invalidate_model();
04812 }
04813
04814 bool CHMM::append_model(CHMM* app_model)
04815 {
04816 bool result=false;
04817 const int32_t num_states=app_model->get_N();
04818 int32_t i,j;
04819
04820 SG_DEBUG( "cur N:%d M:%d\n", N, M);
04821 SG_DEBUG( "old N:%d M:%d\n", app_model->get_N(), app_model->get_M());
04822 if (app_model->get_M() == get_M())
04823 {
04824 float64_t* n_p=SG_MALLOC(float64_t, N+num_states);
04825 float64_t* n_q=SG_MALLOC(float64_t, N+num_states);
04826 float64_t* n_a=SG_MALLOC(float64_t, (N+num_states)*(N+num_states));
04827
04828 float64_t* n_b=SG_MALLOC(float64_t, (N+num_states)*M);
04829
04830
04831 for (i=0; i<N+num_states; i++)
04832 {
04833 n_p[i]=-CMath::INFTY;
04834 n_q[i]=-CMath::INFTY;
04835
04836 for (j=0; j<N+num_states; j++)
04837 n_a[(N+num_states)*i+j]=-CMath::INFTY;
04838
04839 for (j=0; j<M; j++)
04840 n_b[M*i+j]=-CMath::INFTY;
04841 }
04842
04843
04844
04845
04846
04847
04848 for (i=0; i<N; i++)
04849 {
04850 n_p[i]=get_p(i);
04851
04852 for (j=0; j<N; j++)
04853 n_a[(N+num_states)*j+i]=get_a(i,j);
04854
04855 for (j=0; j<M; j++)
04856 {
04857 n_b[M*i+j]=get_b(i,j);
04858 }
04859 }
04860
04861
04862 for (i=0; i<app_model->get_N(); i++)
04863 {
04864 n_q[i+N]=app_model->get_q(i);
04865
04866 for (j=0; j<app_model->get_N(); j++)
04867 n_a[(N+num_states)*(j+N)+(i+N)]=app_model->get_a(i,j);
04868 for (j=0; j<app_model->get_M(); j++)
04869 n_b[M*(i+N)+j]=app_model->get_b(i,j);
04870 }
04871
04872
04873
04874 for (i=0; i<N; i++)
04875 {
04876 for (j=N; j<N+num_states; j++)
04877 n_a[(N+num_states)*j + i]=CMath::logarithmic_sum(get_q(i)+app_model->get_p(j-N), n_a[(N+num_states)*j + i]);
04878 }
04879
04880 free_state_dependend_arrays();
04881 N+=num_states;
04882
04883 alloc_state_dependend_arrays();
04884
04885
04886 SG_FREE(initial_state_distribution_p);
04887 SG_FREE(end_state_distribution_q);
04888 SG_FREE(transition_matrix_a);
04889 SG_FREE(observation_matrix_b);
04890
04891 transition_matrix_a=n_a;
04892 observation_matrix_b=n_b;
04893 initial_state_distribution_p=n_p;
04894 end_state_distribution_q=n_q;
04895
04896 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
04898 invalidate_model();
04899 }
04900 else
04901 SG_ERROR( "number of observations is different for append model, doing nothing!\n");
04902
04903 return result;
04904 }
04905
04906 bool CHMM::append_model(CHMM* app_model, float64_t* cur_out, float64_t* app_out)
04907 {
04908 bool result=false;
04909 const int32_t num_states=app_model->get_N()+2;
04910 int32_t i,j;
04911
04912 if (app_model->get_M() == get_M())
04913 {
04914 float64_t* n_p=SG_MALLOC(float64_t, N+num_states);
04915 float64_t* n_q=SG_MALLOC(float64_t, N+num_states);
04916 float64_t* n_a=SG_MALLOC(float64_t, (N+num_states)*(N+num_states));
04917
04918 float64_t* n_b=SG_MALLOC(float64_t, (N+num_states)*M);
04919
04920
04921 for (i=0; i<N+num_states; i++)
04922 {
04923 n_p[i]=-CMath::INFTY;
04924 n_q[i]=-CMath::INFTY;
04925
04926 for (j=0; j<N+num_states; j++)
04927 n_a[(N+num_states)*j+i]=-CMath::INFTY;
04928
04929 for (j=0; j<M; j++)
04930 n_b[M*i+j]=-CMath::INFTY;
04931 }
04932
04933
04934
04935
04936
04937
04938 for (i=0; i<N; i++)
04939 {
04940 n_p[i]=get_p(i);
04941
04942 for (j=0; j<N; j++)
04943 n_a[(N+num_states)*j+i]=get_a(i,j);
04944
04945 for (j=0; j<M; j++)
04946 {
04947 n_b[M*i+j]=get_b(i,j);
04948 }
04949 }
04950
04951
04952 for (i=0; i<app_model->get_N(); i++)
04953 {
04954 n_q[i+N+2]=app_model->get_q(i);
04955
04956 for (j=0; j<app_model->get_N(); j++)
04957 n_a[(N+num_states)*(j+N+2)+(i+N+2)]=app_model->get_a(i,j);
04958 for (j=0; j<app_model->get_M(); j++)
04959 n_b[M*(i+N+2)+j]=app_model->get_b(i,j);
04960 }
04961
04962
04963
04964
04965 for (i=0; i<M; i++)
04966 {
04967 n_b[M*N+i]=cur_out[i];
04968 n_b[M*(N+1)+i]=app_out[i];
04969 }
04970
04971
04972 for (i=0; i<N+num_states; i++)
04973 {
04974
04975 if (i==N+1)
04976 n_a[(N+num_states)*i + N]=0;
04977
04978
04979
04980 if (i<N)
04981 n_a[(N+num_states)*N+i]=get_q(i);
04982
04983
04984
04985 if (i>=N+2)
04986 n_a[(N+num_states)*i+(N+1)]=app_model->get_p(i-(N+2));
04987 }
04988
04989 free_state_dependend_arrays();
04990 N+=num_states;
04991
04992 alloc_state_dependend_arrays();
04993
04994
04995 SG_FREE(initial_state_distribution_p);
04996 SG_FREE(end_state_distribution_q);
04997 SG_FREE(transition_matrix_a);
04998 SG_FREE(observation_matrix_b);
04999
05000 transition_matrix_a=n_a;
05001 observation_matrix_b=n_b;
05002 initial_state_distribution_p=n_p;
05003 end_state_distribution_q=n_q;
05004
05005 SG_WARNING( "not normalizing anymore, call normalize_hmm to make sure the hmm is valid!!\n");
05007 invalidate_model();
05008 }
05009
05010 return result;
05011 }
05012
05013
05014 void CHMM::add_states(int32_t num_states, float64_t default_value)
05015 {
05016 int32_t i,j;
05017 const float64_t MIN_RAND=1e-2;
05018 const float64_t MAX_RAND=2e-1;
05019
05020 float64_t* n_p=SG_MALLOC(float64_t, N+num_states);
05021 float64_t* n_q=SG_MALLOC(float64_t, N+num_states);
05022 float64_t* n_a=SG_MALLOC(float64_t, (N+num_states)*(N+num_states));
05023
05024 float64_t* n_b=SG_MALLOC(float64_t, (N+num_states)*M);
05025
05026
05027
05028 for (i=0; i<N; i++)
05029 {
05030 n_p[i]=get_p(i);
05031 n_q[i]=get_q(i);
05032
05033 for (j=0; j<N; j++)
05034 n_a[(N+num_states)*j+i]=get_a(i,j);
05035
05036 for (j=0; j<M; j++)
05037 n_b[M*i+j]=get_b(i,j);
05038 }
05039
05040 for (i=N; i<N+num_states; i++)
05041 {
05042 n_p[i]=VAL_MACRO;
05043 n_q[i]=VAL_MACRO;
05044
05045 for (j=0; j<N; j++)
05046 n_a[(N+num_states)*i+j]=VAL_MACRO;
05047
05048 for (j=0; j<N+num_states; j++)
05049 n_a[(N+num_states)*j+i]=VAL_MACRO;
05050
05051 for (j=0; j<M; j++)
05052 n_b[M*i+j]=VAL_MACRO;
05053 }
05054 free_state_dependend_arrays();
05055 N+=num_states;
05056
05057 alloc_state_dependend_arrays();
05058
05059
05060 SG_FREE(initial_state_distribution_p);
05061 SG_FREE(end_state_distribution_q);
05062 SG_FREE(transition_matrix_a);
05063 SG_FREE(observation_matrix_b);
05064
05065 transition_matrix_a=n_a;
05066 observation_matrix_b=n_b;
05067 initial_state_distribution_p=n_p;
05068 end_state_distribution_q=n_q;
05069
05070 invalidate_model();
05071 normalize();
05072 }
05073
05074 void CHMM::chop(float64_t value)
05075 {
05076 for (int32_t i=0; i<N; i++)
05077 {
05078 int32_t j;
05079
05080 if (exp(get_p(i)) < value)
05081 set_p(i, CMath::ALMOST_NEG_INFTY);
05082
05083 if (exp(get_q(i)) < value)
05084 set_q(i, CMath::ALMOST_NEG_INFTY);
05085
05086 for (j=0; j<N; j++)
05087 {
05088 if (exp(get_a(i,j)) < value)
05089 set_a(i,j, CMath::ALMOST_NEG_INFTY);
05090 }
05091
05092 for (j=0; j<M; j++)
05093 {
05094 if (exp(get_b(i,j)) < value)
05095 set_b(i,j, CMath::ALMOST_NEG_INFTY);
05096 }
05097 }
05098 normalize();
05099 invalidate_model();
05100 }
05101
05102 bool CHMM::linear_train(bool right_align)
05103 {
05104 if (p_observations)
05105 {
05106 int32_t histsize=(get_M()*get_N());
05107 int32_t* hist=SG_MALLOC(int32_t, histsize);
05108 int32_t* startendhist=SG_MALLOC(int32_t, get_N());
05109 int32_t i,dim;
05110
05111 ASSERT(p_observations->get_max_vector_length()<=get_N());
05112
05113 for (i=0; i<histsize; i++)
05114 hist[i]=0;
05115
05116 for (i=0; i<get_N(); i++)
05117 startendhist[i]=0;
05118
05119 if (right_align)
05120 {
05121 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05122 {
05123 int32_t len=0;
05124 bool free_vec;
05125 uint16_t* obs=p_observations->get_feature_vector(dim, len, free_vec);
05126
05127 ASSERT(len<=get_N());
05128 startendhist[(get_N()-len)]++;
05129
05130 for (i=0;i<len;i++)
05131 hist[(get_N()-len+i)*get_M() + *obs++]++;
05132
05133 p_observations->free_feature_vector(obs, dim, free_vec);
05134 }
05135
05136 set_q(get_N()-1, 1);
05137 for (i=0; i<get_N()-1; i++)
05138 set_q(i, 0);
05139
05140 for (i=0; i<get_N(); i++)
05141 set_p(i, startendhist[i]+PSEUDO);
05142
05143 for (i=0;i<get_N();i++)
05144 {
05145 for (int32_t j=0; j<get_N(); j++)
05146 {
05147 if (i==j-1)
05148 set_a(i,j, 1);
05149 else
05150 set_a(i,j, 0);
05151 }
05152 }
05153 }
05154 else
05155 {
05156 for (dim=0; dim<p_observations->get_num_vectors(); dim++)
05157 {
05158 int32_t len=0;
05159 bool free_vec;
05160 uint16_t* obs=p_observations->get_feature_vector(dim, len, free_vec);
05161
05162 ASSERT(len<=get_N());
05163 for (i=0;i<len;i++)
05164 hist[i*get_M() + *obs++]++;
05165
05166 startendhist[len-1]++;
05167
05168 p_observations->free_feature_vector(obs, dim, free_vec);
05169 }
05170
05171 set_p(0, 1);
05172 for (i=1; i<get_N(); i++)
05173 set_p(i, 0);
05174
05175 for (i=0; i<get_N(); i++)
05176 set_q(i, startendhist[i]+PSEUDO);
05177
05178 int32_t total=p_observations->get_num_vectors();
05179
05180 for (i=0;i<get_N();i++)
05181 {
05182 total-= startendhist[i] ;
05183
05184 for (int32_t j=0; j<get_N(); j++)
05185 {
05186 if (i==j-1)
05187 set_a(i,j, total+PSEUDO);
05188 else
05189 set_a(i,j, 0);
05190 }
05191 }
05192 ASSERT(total==0);
05193 }
05194
05195 for (i=0;i<get_N();i++)
05196 {
05197 for (int32_t j=0; j<get_M(); j++)
05198 {
05199 float64_t sum=0;
05200 int32_t offs=i*get_M()+ p_observations->get_masked_symbols((uint16_t) j, (uint8_t) 254);
05201
05202 for (int32_t k=0; k<p_observations->get_original_num_symbols(); k++)
05203 sum+=hist[offs+k];
05204
05205 set_b(i,j, (PSEUDO+hist[i*get_M()+j])/(sum+PSEUDO*p_observations->get_original_num_symbols()));
05206 }
05207 }
05208
05209 SG_FREE(hist);
05210 SG_FREE(startendhist);
05211 convert_to_log();
05212 invalidate_model();
05213 return true;
05214 }
05215 else
05216 return false;
05217 }
05218
05219 void CHMM::set_observation_nocache(CStringFeatures<uint16_t>* obs)
05220 {
05221 ASSERT(obs);
05222 p_observations=obs;
05223 SG_REF(obs);
05224
05225 if (obs)
05226 if (obs->get_num_symbols() > M)
05227 SG_ERROR( "number of symbols in observation (%ld) larger than M (%d)\n", (long) obs->get_num_symbols(), M);
05228
05229 if (!reused_caches)
05230 {
05231 #ifdef USE_HMMPARALLEL_STRUCTURES
05232 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05233 {
05234 SG_FREE(alpha_cache[i].table);
05235 SG_FREE(beta_cache[i].table);
05236 SG_FREE(states_per_observation_psi[i]);
05237 SG_FREE(path[i]);
05238
05239 alpha_cache[i].table=NULL;
05240 beta_cache[i].table=NULL;
05241 states_per_observation_psi[i]=NULL;
05242 path[i]=NULL;
05243 } ;
05244 #else
05245 SG_FREE(alpha_cache.table);
05246 SG_FREE(beta_cache.table);
05247 SG_FREE(states_per_observation_psi);
05248 SG_FREE(path);
05249
05250 alpha_cache.table=NULL;
05251 beta_cache.table=NULL;
05252 states_per_observation_psi=NULL;
05253 path=NULL;
05254
05255 #endif //USE_HMMPARALLEL_STRUCTURES
05256 }
05257
05258 invalidate_model();
05259 }
05260
05261 void CHMM::set_observations(CStringFeatures<uint16_t>* obs, CHMM* lambda)
05262 {
05263 ASSERT(obs);
05264 SG_REF(obs);
05265 p_observations=obs;
05266
05267
05268
05269
05270 SG_REF(obs);
05271 features=obs;
05272
05273 SG_DEBUG("num symbols alphabet: %ld\n", obs->get_alphabet()->get_num_symbols());
05274 SG_DEBUG("num symbols: %ld\n", obs->get_num_symbols());
05275 SG_DEBUG("M: %d\n", M);
05276
05277 if (obs)
05278 {
05279 if (obs->get_num_symbols() > M)
05280 {
05281 SG_ERROR( "number of symbols in observation (%ld) larger than M (%d)\n", (long) obs->get_num_symbols(), M);
05282 }
05283 }
05284
05285 if (!reused_caches)
05286 {
05287 #ifdef USE_HMMPARALLEL_STRUCTURES
05288 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05289 {
05290 SG_FREE(alpha_cache[i].table);
05291 SG_FREE(beta_cache[i].table);
05292 SG_FREE(states_per_observation_psi[i]);
05293 SG_FREE(path[i]);
05294
05295 alpha_cache[i].table=NULL;
05296 beta_cache[i].table=NULL;
05297 states_per_observation_psi[i]=NULL;
05298 path[i]=NULL;
05299 } ;
05300 #else
05301 SG_FREE(alpha_cache.table);
05302 SG_FREE(beta_cache.table);
05303 SG_FREE(states_per_observation_psi);
05304 SG_FREE(path);
05305
05306 alpha_cache.table=NULL;
05307 beta_cache.table=NULL;
05308 states_per_observation_psi=NULL;
05309 path=NULL;
05310
05311 #endif //USE_HMMPARALLEL_STRUCTURES
05312 }
05313
05314 if (obs!=NULL)
05315 {
05316 int32_t max_T=obs->get_max_vector_length();
05317
05318 if (lambda)
05319 {
05320 #ifdef USE_HMMPARALLEL_STRUCTURES
05321 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05322 {
05323 this->alpha_cache[i].table= lambda->alpha_cache[i].table;
05324 this->beta_cache[i].table= lambda->beta_cache[i].table;
05325 this->states_per_observation_psi[i]=lambda->states_per_observation_psi[i] ;
05326 this->path[i]=lambda->path[i];
05327 } ;
05328 #else
05329 this->alpha_cache.table= lambda->alpha_cache.table;
05330 this->beta_cache.table= lambda->beta_cache.table;
05331 this->states_per_observation_psi= lambda->states_per_observation_psi;
05332 this->path=lambda->path;
05333 #endif //USE_HMMPARALLEL_STRUCTURES
05334
05335 this->reused_caches=true;
05336 }
05337 else
05338 {
05339 this->reused_caches=false;
05340 #ifdef USE_HMMPARALLEL_STRUCTURES
05341 SG_INFO( "allocating mem for path-table of size %.2f Megabytes (%d*%d) each:\n", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05342 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05343 {
05344 if ((states_per_observation_psi[i]=SG_MALLOC(T_STATES,max_T*N))!=NULL)
05345 SG_DEBUG( "path_table[%i] successfully allocated\n",i) ;
05346 else
05347 SG_ERROR( "failed allocating memory for path_table[%i].\n",i) ;
05348 path[i]=SG_MALLOC(T_STATES, max_T);
05349 }
05350 #else // no USE_HMMPARALLEL_STRUCTURES
05351 SG_INFO( "allocating mem of size %.2f Megabytes (%d*%d) for path-table ....", ((float32_t)max_T)*N*sizeof(T_STATES)/(1024*1024), max_T, N);
05352 if ((states_per_observation_psi=SG_MALLOC(T_STATES,max_T*N)) != NULL)
05353 SG_DONE();
05354 else
05355 SG_ERROR( "failed.\n") ;
05356
05357 path=SG_MALLOC(T_STATES, max_T);
05358 #endif // USE_HMMPARALLEL_STRUCTURES
05359 #ifdef USE_HMMCACHE
05360 SG_INFO( "allocating mem for caches each of size %.2f Megabytes (%d*%d) ....\n", ((float32_t)max_T)*N*sizeof(T_ALPHA_BETA_TABLE)/(1024*1024), max_T, N);
05361
05362 #ifdef USE_HMMPARALLEL_STRUCTURES
05363 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05364 {
05365 if ((alpha_cache[i].table=SG_MALLOC(T_ALPHA_BETA_TABLE, max_T*N))!=NULL)
05366 SG_DEBUG( "alpha_cache[%i].table successfully allocated\n",i) ;
05367 else
05368 SG_ERROR("allocation of alpha_cache[%i].table failed\n",i) ;
05369
05370 if ((beta_cache[i].table=SG_MALLOC(T_ALPHA_BETA_TABLE, max_T*N)) != NULL)
05371 SG_DEBUG("beta_cache[%i].table successfully allocated\n",i) ;
05372 else
05373 SG_ERROR("allocation of beta_cache[%i].table failed\n",i) ;
05374 } ;
05375 #else // USE_HMMPARALLEL_STRUCTURES
05376 if ((alpha_cache.table=SG_MALLOC(T_ALPHA_BETA_TABLE, max_T*N)) != NULL)
05377 SG_DEBUG( "alpha_cache.table successfully allocated\n") ;
05378 else
05379 SG_ERROR( "allocation of alpha_cache.table failed\n") ;
05380
05381 if ((beta_cache.table=SG_MALLOC(T_ALPHA_BETA_TABLE, max_T*N)) != NULL)
05382 SG_DEBUG( "beta_cache.table successfully allocated\n") ;
05383 else
05384 SG_ERROR( "allocation of beta_cache.table failed\n") ;
05385
05386 #endif // USE_HMMPARALLEL_STRUCTURES
05387 #else // USE_HMMCACHE
05388 #ifdef USE_HMMPARALLEL_STRUCTURES
05389 for (int32_t i=0; i<parallel->get_num_threads(); i++)
05390 {
05391 alpha_cache[i].table=NULL ;
05392 beta_cache[i].table=NULL ;
05393 } ;
05394 #else //USE_HMMPARALLEL_STRUCTURES
05395 alpha_cache.table=NULL ;
05396 beta_cache.table=NULL ;
05397 #endif //USE_HMMPARALLEL_STRUCTURES
05398 #endif //USE_HMMCACHE
05399 }
05400 }
05401
05402
05403 invalidate_model();
05404 }
05405
05406 bool CHMM::permutation_entropy(int32_t window_width, int32_t sequence_number)
05407 {
05408 if (p_observations && window_width>0 &&
05409 ( sequence_number<0 || sequence_number < p_observations->get_num_vectors()))
05410 {
05411 int32_t min_sequence=sequence_number;
05412 int32_t max_sequence=sequence_number;
05413
05414 if (sequence_number<0)
05415 {
05416 min_sequence=0;
05417 max_sequence=p_observations->get_num_vectors();
05418 SG_INFO( "numseq: %d\n", max_sequence);
05419 }
05420
05421 SG_INFO( "min_sequence: %d max_sequence: %d\n", min_sequence, max_sequence);
05422 for (sequence_number=min_sequence; sequence_number<max_sequence; sequence_number++)
05423 {
05424 int32_t sequence_length=0;
05425 bool free_vec;
05426 uint16_t* obs=p_observations->get_feature_vector(sequence_number, sequence_length, free_vec);
05427
05428 int32_t histsize=get_M();
05429 int64_t* hist=SG_MALLOC(int64_t, histsize);
05430 int32_t i,j;
05431
05432 for (i=0; i<sequence_length-window_width; i++)
05433 {
05434 for (j=0; j<histsize; j++)
05435 hist[j]=0;
05436
05437 uint16_t* ptr=&obs[i];
05438 for (j=0; j<window_width; j++)
05439 {
05440 hist[*ptr++]++;
05441 }
05442
05443 float64_t perm_entropy=0;
05444 for (j=0; j<get_M(); j++)
05445 {
05446 float64_t p=
05447 (((float64_t) hist[j])+PSEUDO)/
05448 (window_width+get_M()*PSEUDO);
05449 perm_entropy+=p*log(p);
05450 }
05451
05452 SG_PRINT( "%f\n", perm_entropy);
05453 }
05454 p_observations->free_feature_vector(obs, sequence_number, free_vec);
05455
05456 SG_FREE(hist);
05457 }
05458 return true;
05459 }
05460 else
05461 return false;
05462 }
05463
05464 float64_t CHMM::get_log_derivative(int32_t num_param, int32_t num_example)
05465 {
05466 if (num_param<N)
05467 return model_derivative_p(num_param, num_example);
05468 else if (num_param<2*N)
05469 return model_derivative_q(num_param-N, num_example);
05470 else if (num_param<N*(N+2))
05471 {
05472 int32_t k=num_param-2*N;
05473 int32_t i=k/N;
05474 int32_t j=k%N;
05475 return model_derivative_a(i,j, num_example);
05476 }
05477 else if (num_param<N*(N+2+M))
05478 {
05479 int32_t k=num_param-N*(N+2);
05480 int32_t i=k/M;
05481 int32_t j=k%M;
05482 return model_derivative_b(i,j, num_example);
05483 }
05484
05485 ASSERT(false);
05486 return -1;
05487 }
05488
05489 float64_t CHMM::get_log_model_parameter(int32_t num_param)
05490 {
05491 if (num_param<N)
05492 return get_p(num_param);
05493 else if (num_param<2*N)
05494 return get_q(num_param-N);
05495 else if (num_param<N*(N+2))
05496 return transition_matrix_a[num_param-2*N];
05497 else if (num_param<N*(N+2+M))
05498 return observation_matrix_b[num_param-N*(N+2)];
05499
05500 ASSERT(false);
05501 return -1;
05502 }
05503
05504
05505
05506 bool CHMM::converged(float64_t x, float64_t y)
05507 {
05508 float64_t diff=y-x;
05509 float64_t absdiff=fabs(diff);
05510
05511 SG_INFO( "\n #%03d\tbest result so far: %G (eps: %f)", iteration_count, y, diff);
05512
05513 if (iteration_count--==0 || (absdiff<epsilon && conv_it<=0))
05514 {
05515 iteration_count=iterations;
05516 SG_INFO( "...finished\n");
05517 conv_it=5;
05518 return true;
05519 }
05520 else
05521 {
05522 if (absdiff<epsilon)
05523 conv_it--;
05524 else
05525 conv_it=5;
05526
05527 return false;
05528 }
05529 }
05530
05531 bool CHMM::baum_welch_viterbi_train(BaumWelchViterbiType type)
05532 {
05533 CHMM* estimate=new CHMM(this);
05534 CHMM* working=this;
05535 float64_t prob_max=-CMath::INFTY;
05536 float64_t prob=-CMath::INFTY;
05537 float64_t prob_train=CMath::ALMOST_NEG_INFTY;
05538 iteration_count=iterations;
05539
05540 while (!converged(prob, prob_train) && (!CSignal::cancel_computations()))
05541 {
05542 CMath::swap(working, estimate);
05543 prob=prob_train;
05544
05545 switch (type) {
05546 case BW_NORMAL:
05547 working->estimate_model_baum_welch(estimate); break;
05548 case BW_TRANS:
05549 working->estimate_model_baum_welch_trans(estimate); break;
05550 case BW_DEFINED:
05551 working->estimate_model_baum_welch_defined(estimate); break;
05552 case VIT_NORMAL:
05553 working->estimate_model_viterbi(estimate); break;
05554 case VIT_DEFINED:
05555 working->estimate_model_viterbi_defined(estimate); break;
05556 }
05557 prob_train=estimate->model_probability();
05558
05559 if (prob_max<prob_train)
05560 prob_max=prob_train;
05561 }
05562
05563 if (estimate == this)
05564 {
05565 estimate->copy_model(working);
05566 delete working;
05567 }
05568 else
05569 delete estimate;
05570
05571 return true;
05572 }