SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
GUIHMM.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2008 Soeren Sonnenburg
8  * Written (W) 1999-2008 Gunnar Raetsch
9  * Copyright (C) 1999-2008 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/ui/GUIHMM.h>
13 #include <shogun/ui/SGInterface.h>
14 
15 #include <shogun/lib/config.h>
16 #include <shogun/lib/common.h>
18 #include <shogun/labels/Labels.h>
21 
22 #include <unistd.h>
23 
24 using namespace shogun;
25 
26 CGUIHMM::CGUIHMM(CSGInterface* ui_)
27 : CSGObject(), ui(ui_)
28 {
29  working=NULL;
30 
31  pos=NULL;
32  neg=NULL;
33  test=NULL;
34 
35  PSEUDO=1e-10;
36  M=4;
37 }
38 
40 {
42 }
43 
44 bool CGUIHMM::new_hmm(int32_t n, int32_t m)
45 {
47  working=new CHMM(n, m, NULL, PSEUDO);
48  M=m;
49  return true;
50 }
51 
53 {
54  if (!working)
55  SG_ERROR("Create HMM first.\n")
56 
57  CFeatures* trainfeatures=ui->ui_features->get_train_features();
58  if (!trainfeatures)
59  SG_ERROR("Assign train features first.\n")
60  if (trainfeatures->get_feature_type()!=F_WORD ||
61  trainfeatures->get_feature_class()!=C_STRING)
62  SG_ERROR("Features must be STRING of type WORD.\n")
63 
65  SG_DEBUG("Stringfeatures have %ld orig_symbols %ld symbols %d order %ld max_symbols\n", (int64_t) sf->get_original_num_symbols(), (int64_t) sf->get_num_symbols(), sf->get_order(), (int64_t) sf->get_max_num_symbols())
66 
68 
70 }
71 
72 
74 {
75  if (!working)
76  SG_ERROR("Create HMM first.\n")
77 
78  CFeatures* trainfeatures=ui->ui_features->get_train_features();
79  if (!trainfeatures)
80  SG_ERROR("Assign train features first.\n")
81  if (trainfeatures->get_feature_type()!=F_WORD ||
82  trainfeatures->get_feature_class()!=C_STRING)
83  SG_ERROR("Features must be STRING of type WORD.\n")
84 
86 
88 }
89 
90 
92 {
93  if (!working)
94  SG_ERROR("Create HMM first.\n")
95  if (!working->get_observations())
96  SG_ERROR("Assign observation first.\n")
97 
99 }
100 
102 {
103  if (!working)
104  SG_ERROR("Create HMM first.\n")
105  if (!working->get_observations())
106  SG_ERROR("Assign observation first.\n")
107 
109 }
110 
112 {
113  if (!working)
114  SG_ERROR("Create HMM first.\n")
115  if (!working->get_observations())
116  SG_ERROR("Assign observation first.\n")
117 
119 }
120 
121 bool CGUIHMM::linear_train(char align)
122 {
123  if (!working)
124  SG_ERROR("Create HMM first.\n")
125 
126  CFeatures* trainfeatures=ui->ui_features->get_train_features();
127  if (!trainfeatures)
128  SG_ERROR("Assign train features first.\n")
129  if (trainfeatures->get_feature_type()!=F_WORD ||
130  trainfeatures->get_feature_class()!=C_STRING)
131  SG_ERROR("Features must be STRING of type WORD.\n")
132 
134  ui_features->get_train_features());
135 
136  bool right_align=false;
137  if (align=='r')
138  {
139  SG_INFO("Using alignment to right.\n")
140  right_align=true;
141  }
142  else
143  SG_INFO("Using alignment to left.\n")
144  working->linear_train(right_align);
145 
146  return true;
147 }
148 
150 {
152  ui_features->get_test_features();
153  ASSERT(obs)
154  int32_t num_vec=obs->get_num_vectors();
155 
156  //CStringFeatures<uint16_t>* old_pos=pos->get_observations();
157  //CStringFeatures<uint16_t>* old_neg=neg->get_observations();
158 
159  pos->set_observations(obs);
160  neg->set_observations(obs);
161 
162  if (!result)
163  result=new CRegressionLabels(num_vec);
164 
165  for (int32_t i=0; i<num_vec; i++)
166  result->set_label(i, pos->model_probability(i) - neg->model_probability(i));
167 
168  //pos->set_observations(old_pos);
169  //neg->set_observations(old_neg);
170  return result;
171 }
172 
174 {
176  ui_features->get_test_features();
177  ASSERT(obs)
178 
179  //CStringFeatures<uint16_t>* old_pos=pos->get_observations();
180  //CStringFeatures<uint16_t>* old_neg=neg->get_observations();
181 
182  pos->set_observations(obs);
183  neg->set_observations(obs);
184 
185  float64_t result=pos->model_probability(idx) - neg->model_probability(idx);
186  //pos->set_observations(old_pos);
187  //neg->set_observations(old_neg);
188  return result;
189 }
190 
192 {
193  ASSERT(working)
194 
196  ui_features->get_test_features();
197  ASSERT(obs)
198  int32_t num_vec=obs->get_num_vectors();
199 
200  //CStringFeatures<uint16_t>* old_pos=working->get_observations();
202 
203  if (!result)
204  result=new CRegressionLabels(num_vec);
205 
206  for (int32_t i=0; i<num_vec; i++)
207  result->set_label(i, working->model_probability(i));
208 
209  //working->set_observations(old_pos);
210  return result;
211 }
212 
214 {
215  ASSERT(working)
216 
218  ui_features->get_test_features();
219  ASSERT(obs)
220  int32_t num_vec=obs->get_num_vectors();
221 
222  //CStringFeatures<uint16_t>* old_pos=working->get_observations();
224 
225  if (!result)
226  result=new CRegressionLabels(num_vec);
227 
228  for (int32_t i=0; i<num_vec; i++)
230 
231  //working->set_observations(old_pos);
232  return result;
233 }
234 
235 
237 {
238  ASSERT(working)
239 
241  ui_features->get_test_features();
242  ASSERT(obs)
243 
244  //CStringFeatures<uint16_t>* old_pos=pos->get_observations();
245 
246  pos->set_observations(obs);
247  neg->set_observations(obs);
248 
249  float64_t result=working->model_probability(idx);
250  //working->set_observations(old_pos);
251  return result;
252 }
253 
254 bool CGUIHMM::append_model(char* filename, int32_t base1, int32_t base2)
255 {
256  if (!working)
257  SG_ERROR("Create HMM first.\n")
258  if (!filename)
259  SG_ERROR("Invalid filename.\n")
260 
261  FILE* model_file=fopen(filename, "r");
262  if (!model_file)
263  SG_ERROR("Opening file %s failed.\n", filename)
264 
265  CHMM* h=new CHMM(model_file,PSEUDO);
266  if (!h || !h->get_status())
267  {
268  SG_UNREF(h);
269  fclose(model_file);
270  SG_ERROR("Reading file %s failed.\n", filename)
271  }
272 
273  fclose(model_file);
274  SG_INFO("File %s successfully read.\n", filename)
275 
276  SG_DEBUG("h %d , M: %d\n", h, h->get_M())
277  if (base1!=-1 && base2!=-1)
278  {
279  float64_t* cur_o=SG_MALLOC(float64_t, h->get_M());
280  float64_t* app_o=SG_MALLOC(float64_t, h->get_M());
281 
282  for (int32_t i=0; i<h->get_M(); i++)
283  {
284  if (i==base1)
285  cur_o[i]=0;
286  else
287  cur_o[i]=-1000;
288 
289  if (i==base2)
290  app_o[i]=0;
291  else
292  app_o[i]=-1000;
293  }
294 
295  working->append_model(h, cur_o, app_o);
296 
297  SG_FREE(cur_o);
298  SG_FREE(app_o);
299  }
300  else
301  working->append_model(h);
302 
303  SG_UNREF(h);
304  SG_INFO("New model has %i states.\n", working->get_N())
305  return true;
306 }
307 
308 bool CGUIHMM::add_states(int32_t num_states, float64_t value)
309 {
310  if (!working)
311  SG_ERROR("Create HMM first.\n")
312 
313  working->add_states(num_states, value);
314  SG_INFO("New model has %i states, value %f.\n", working->get_N(), value)
315  return true;
316 }
317 
319 {
320  PSEUDO=pseudo;
321  SG_INFO("Current setting: pseudo=%e.\n", PSEUDO)
322  return true;
323 }
324 
325 bool CGUIHMM::convergence_criteria(int32_t num_iterations, float64_t epsilon)
326 {
327  if (!working)
328  SG_ERROR("Create HMM first.\n")
329 
330  working->set_iterations(num_iterations);
331  working->set_epsilon(epsilon);
332 
333  SG_INFO("Current HMM convergence criteria: iterations=%i, epsilon=%e\n", working->get_iterations(), working->get_epsilon())
334  return true;
335 }
336 
337 bool CGUIHMM::set_hmm_as(char* target)
338 {
339  if (!working)
340  SG_ERROR("Create HMM first!\n")
341 
342  if (strncmp(target, "POS", 3)==0)
343  {
344  SG_UNREF(pos);
345  pos=working;
346  working=NULL;
347  }
348  else if (strncmp(target, "NEG", 3)==0)
349  {
350  SG_UNREF(neg);
351  neg=working;
352  working=NULL;
353  }
354  else if (strncmp(target, "TEST", 4)==0)
355  {
356  SG_UNREF(test);
357  test=working;
358  working=NULL;
359  }
360  else
361  SG_ERROR("Target POS|NEG|TEST is missing.\n")
362 
363  return true;
364 }
365 
366 bool CGUIHMM::load(char* filename)
367 {
368  bool result=false;
369 
370  FILE* model_file=fopen(filename, "r");
371  if (!model_file)
372  SG_ERROR("Opening file %s failed.\n", filename)
373 
374  SG_UNREF(working);
375  working=new CHMM(model_file, PSEUDO);
376  fclose(model_file);
377 
378  if (working && working->get_status())
379  {
380  SG_INFO("Loaded HMM successfully from file %s.\n", filename)
381  result=true;
382  }
383 
384  M=working->get_M();
385 
386  return result;
387 }
388 
389 bool CGUIHMM::save(char* filename, bool is_binary)
390 {
391  bool result=false;
392 
393  if (!working)
394  SG_ERROR("Create HMM first.\n")
395 
396  FILE* file=fopen(filename, "w");
397  if (file)
398  {
399  if (is_binary)
400  result=working->save_model_bin(file);
401  else
402  result=working->save_model(file);
403  }
404 
405  if (!file || !result)
406  SG_ERROR("Writing to file %s failed!\n", filename)
407  else
408  SG_INFO("Successfully written model into %s!\n", filename)
409 
410  if (file)
411  fclose(file);
412 
413  return result;
414 }
415 
416 bool CGUIHMM::load_definitions(char* filename, bool do_init)
417 {
418  if (!working)
419  SG_ERROR("Create HMM first.\n")
420 
421  bool result=false;
422  FILE* def_file=fopen(filename, "r");
423  if (!def_file)
424  SG_ERROR("Opening file %s failed\n", filename)
425 
426  if (working->load_definitions(def_file, true, do_init))
427  {
428  SG_INFO("Definitions successfully read from %s.\n", filename)
429  result=true;
430  }
431  else
432  SG_ERROR("Couldn't load definitions form file %s.\n", filename)
433 
434  fclose(def_file);
435  return result;
436 }
437 
438 bool CGUIHMM::save_likelihood(char* filename, bool is_binary)
439 {
440  bool result=false;
441 
442  if (!working)
443  SG_ERROR("Create HMM first\n")
444 
445  FILE* file=fopen(filename, "w");
446  if (file)
447  {
449  //if (binary)
450  // result=working->save_model_bin(file);
451  //else
452 
453  result=working->save_likelihood(file);
454  }
455 
456  if (!file || !result)
457  SG_ERROR("Writing to file %s failed!\n", filename)
458  else
459  SG_INFO("Successfully written likelihoods into %s!\n", filename)
460 
461  if (file)
462  fclose(file);
463 
464  return result;
465 }
466 
467 bool CGUIHMM::save_path(char* filename, bool is_binary)
468 {
469  bool result=false;
470  if (!working)
471  SG_ERROR("Create HMM first.\n")
472 
473  FILE* file=fopen(filename, "w");
474  if (file)
475  {
477  //if (binary)
478  //_train()/ result=working->save_model_bin(file);
479  //else
481  ui_features->get_test_features();
482  ASSERT(obs)
484 
485  result=working->save_path(file);
486  }
487 
488  if (!file || !result)
489  SG_ERROR("Writing to file %s failed!\n", filename)
490  else
491  SG_INFO("Successfully written path into %s!\n", filename)
492 
493  if (file)
494  fclose(file);
495 
496  return result;
497 }
498 
500 {
501  if (!working)
502  SG_ERROR("Create HMM first.\n")
503 
504  working->chop(value);
505  return true;
506 }
507 
509 {
510  if (!working)
511  SG_ERROR("Create HMM first!\n")
512 
513  working->output_model(false);
514  return true;
515 }
516 
518 {
519  if (!working)
520  SG_ERROR("Create HMM first!\n")
521 
522  working->output_model(true);
523  return true;
524 }
525 
527 {
528  if (!working)
529  SG_ERROR("Create HMM first!\n")
530 
532  return true;
533 }
534 
535 bool CGUIHMM::best_path(int32_t from, int32_t to)
536 {
537  // FIXME: from unused???
538 
539  if (!working)
540  SG_ERROR("Create HMM first.\n")
541 
542  //get path
543  working->best_path(0);
544 
545  for (int32_t t=0; t<working->get_observations()->get_vector_length(0)-1 && t<to; t++)
546  SG_PRINT("%d ", working->get_best_path_state(0, t))
547  SG_PRINT("\n")
548 
549  //for (t=0; t<p_observations->get_vector_length(0)-1 && t<to; t++)
550  // SG_PRINT("%d ", PATH(0)[t])
551  //
552  return true;
553 }
554 
555 bool CGUIHMM::normalize(bool keep_dead_states)
556 {
557  if (!working)
558  SG_ERROR("Create HMM first.\n")
559 
560  working->normalize(keep_dead_states);
561  return true;
562 }
563 
564 bool CGUIHMM::relative_entropy(float64_t*& values, int32_t& len)
565 {
566  if (!pos || !neg)
567  SG_ERROR("Set pos and neg HMM first!\n")
568 
569  int32_t pos_N=pos->get_N();
570  int32_t neg_N=neg->get_N();
571  int32_t pos_M=pos->get_M();
572  int32_t neg_M=neg->get_M();
573  if (pos_M!=neg_M || pos_N!=neg_N)
574  SG_ERROR("Pos and neg HMM's differ in number of emissions or states.\n")
575 
576  float64_t* p=SG_MALLOC(float64_t, pos_M);
577  float64_t* q=SG_MALLOC(float64_t, neg_M);
578 
579  SG_FREE(values);
580  values=SG_MALLOC(float64_t, pos_N);
581 
582  for (int32_t i=0; i<pos_N; i++)
583  {
584  for (int32_t j=0; j<pos_M; j++)
585  {
586  p[j]=pos->get_b(i, j);
587  q[j]=neg->get_b(i, j);
588  }
589 
590  values[i]=CStatistics::relative_entropy(p, q, pos_M);
591  }
592  SG_FREE(p);
593  SG_FREE(q);
594 
595  len=pos_N;
596  return true;
597 }
598 
599 bool CGUIHMM::entropy(float64_t*& values, int32_t& len)
600 {
601  if (!working)
602  SG_ERROR("Create HMM first!\n")
603 
604  int32_t n=working->get_N();
605  int32_t m=working->get_M();
606  float64_t* p=SG_MALLOC(float64_t, m);
607 
608  SG_FREE(values);
609  values=SG_MALLOC(float64_t, n);
610 
611  for (int32_t i=0; i<n; i++)
612  {
613  for (int32_t j=0; j<m; j++)
614  p[j]=working->get_b(i, j);
615 
616  values[i]=CStatistics::entropy(p, m);
617  }
618  SG_FREE(p);
619 
620  len=m;
621  return true;
622 }
623 
624 bool CGUIHMM::permutation_entropy(int32_t width, int32_t seq_num)
625 {
626  if (!working)
627  SG_ERROR("Create hmm first.\n")
628 
629  if (!working->get_observations())
630  SG_ERROR("Set observations first.\n")
631 
632  return working->permutation_entropy(width, seq_num);
633 }
float64_t one_class_classify_example(int32_t idx)
Definition: GUIHMM.cpp:236
#define SG_INFO(...)
Definition: SGIO.h:118
void chop(float64_t value)
set any model parameter with probability smaller than value to ZERO
Definition: HMM.cpp:5123
bool save_likelihood(FILE *file)
Definition: HMM.cpp:4128
bool likelihood()
Definition: GUIHMM.cpp:508
Real Labels are real-valued labels.
float64_t get_epsilon()
Definition: HMM.h:628
int32_t get_M() const
access function for number of observations M
Definition: HMM.h:984
bool save_model(FILE *file)
Definition: HMM.cpp:3978
float64_t PSEUDO
Definition: GUIHMM.h:182
void set_observations(CStringFeatures< uint16_t > *obs, CHMM *hmm=NULL)
Definition: HMM.cpp:5310
bool linear_train(char align='l')
Definition: GUIHMM.cpp:121
CHMM * working
Definition: GUIHMM.h:172
floatmax_t get_max_num_symbols()
bool permutation_entropy(int32_t width=0, int32_t seq_num=-1)
Definition: GUIHMM.cpp:624
bool append_model(char *filename, int32_t base1=-1, int32_t base2=-1)
Definition: GUIHMM.cpp:254
viterbi only for defined transitions/observations
Definition: HMM.h:80
virtual int32_t get_num_vectors() const
standard viterbi
Definition: HMM.h:78
CHMM * pos
Definition: GUIHMM.h:175
baum welch only for defined transitions/observations
Definition: HMM.h:76
bool viterbi_train_defined()
Definition: GUIHMM.cpp:111
bool linear_train(bool right_align=false)
estimates linear model from observations.
Definition: HMM.cpp:5151
bool save_model_bin(FILE *file)
Definition: HMM.cpp:4149
#define SG_ERROR(...)
Definition: SGIO.h:129
static float64_t relative_entropy(float64_t *p, float64_t *q, int32_t len)
float64_t get_b(T_STATES line_, uint16_t column) const
Definition: HMM.h:1157
bool baum_welch_viterbi_train(BaumWelchViterbiType type)
Definition: HMM.cpp:5580
int32_t get_iterations()
Definition: HMM.h:626
floatmax_t get_original_num_symbols()
bool baum_welch_train_defined()
Definition: GUIHMM.cpp:91
bool viterbi_train()
Definition: GUIHMM.cpp:101
bool set_label(int32_t idx, float64_t label)
uint16_t get_best_path_state(int32_t dim, int32_t t)
Definition: HMM.h:563
CStringFeatures< uint16_t > * get_observations()
return observation pointer
Definition: HMM.h:799
bool entropy(float64_t *&values, int32_t &len)
Definition: GUIHMM.cpp:599
static const float64_t epsilon
Definition: libbmrm.cpp:25
standard baum welch
Definition: HMM.h:72
float64_t model_probability(int32_t dimension=-1)
inline proxy for model probability.
Definition: HMM.h:574
bool set_hmm_as(char *target)
Definition: GUIHMM.cpp:337
#define SG_PRINT(...)
Definition: SGIO.h:137
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
CHMM * neg
Definition: GUIHMM.h:177
bool get_status() const
Definition: HMM.h:746
bool baum_welch_train()
Definition: GUIHMM.cpp:52
static float64_t entropy(float64_t *p, int32_t len)
double float64_t
Definition: common.h:50
bool permutation_entropy(int32_t window_width, int32_t sequence_number)
compute permutation entropy
Definition: HMM.cpp:5455
void add_states(int32_t num_states, float64_t default_val=0)
Definition: HMM.cpp:5063
bool convergence_criteria(int32_t num_iterations=100, float64_t epsilon=0.001)
Definition: GUIHMM.cpp:325
bool save_likelihood(char *filename, bool is_binary=false)
Definition: GUIHMM.cpp:438
virtual EFeatureClass get_feature_class() const =0
float64_t classify_example(int32_t idx)
Definition: GUIHMM.cpp:173
bool relative_entropy(float64_t *&values, int32_t &len)
Definition: GUIHMM.cpp:564
float64_t best_path(int32_t dimension)
Definition: HMM.cpp:1154
CRegressionLabels * linear_one_class_classify(CRegressionLabels *output=NULL)
Definition: GUIHMM.cpp:213
bool set_epsilon(float64_t eps)
Definition: HMM.h:627
bool append_model(CHMM *append_model, float64_t *cur_out, float64_t *app_out)
Definition: HMM.cpp:4955
CRegressionLabels * one_class_classify(CRegressionLabels *output=NULL)
Definition: GUIHMM.cpp:191
bool load_definitions(FILE *file, bool verbose, bool initialize=true)
Definition: HMM.cpp:3273
bool set_iterations(int32_t num)
Definition: HMM.h:625
bool add_states(int32_t num_states=1, float64_t value=0)
Definition: GUIHMM.cpp:308
float64_t linear_model_probability(int32_t dimension)
Definition: HMM.h:593
baum welch only for specified transitions
Definition: HMM.h:74
#define SG_UNREF(x)
Definition: SGObject.h:52
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
bool new_hmm(int32_t n, int32_t m)
Definition: GUIHMM.cpp:44
int32_t M
Definition: GUIHMM.h:184
bool load_definitions(char *filename, bool do_init=false)
Definition: GUIHMM.cpp:416
The class Features is the base class of all feature objects.
Definition: Features.h:68
bool load(char *filename)
Definition: GUIHMM.cpp:366
CSGInterface * ui
Definition: GUIHMM.h:188
bool baum_welch_trans_train()
Definition: GUIHMM.cpp:73
CRegressionLabels * classify(CRegressionLabels *output=NULL)
Definition: GUIHMM.cpp:149
bool normalize(bool keep_dead_states=false)
Definition: GUIHMM.cpp:555
bool output_hmm()
Definition: GUIHMM.cpp:517
CHMM * test
Definition: GUIHMM.h:179
bool chop(float64_t value)
Definition: GUIHMM.cpp:499
void output_model_defined(bool verbose=false)
performs output_model only for the defined transitions etc
Definition: HMM.cpp:2340
bool save(char *filename, bool is_binary=false)
Definition: GUIHMM.cpp:389
void output_model(bool verbose=false)
Definition: HMM.cpp:2256
bool output_hmm_defined()
Definition: GUIHMM.cpp:526
bool save_path(char *filename, bool is_binary=false)
Definition: GUIHMM.cpp:467
void normalize(bool keep_dead_states=false)
normalize the model to satisfy stochasticity
Definition: HMM.cpp:4828
bool set_pseudo(float64_t pseudo)
Definition: GUIHMM.cpp:318
Hidden Markov Model.
Definition: HMM.h:369
T_STATES get_N() const
access function for number of states N
Definition: HMM.h:981
bool best_path(int32_t from=0, int32_t to=100)
Definition: GUIHMM.cpp:535
virtual EFeatureType get_feature_type() const =0
bool save_path(FILE *file)
Definition: HMM.cpp:4087
virtual int32_t get_vector_length(int32_t vec_num)

SHOGUN Machine Learning Toolbox - Documentation