Several functions for tasks such as training,reading/writing models, reading observations, calculation of derivatives are supplied.
Public Member Functions |
| CHMM (void) |
bool | alloc_state_dependend_arrays () |
| allocates memory that depends on N
|
void | free_state_dependend_arrays () |
| free memory that depends on N
|
bool | linear_train (bool right_align=false) |
| estimates linear model from observations.
|
bool | permutation_entropy (int32_t window_width, int32_t sequence_number) |
| compute permutation entropy
|
virtual const char * | get_name () const |
|
|
| CHMM (int32_t N, int32_t M, Model *model, float64_t PSEUDO) |
| CHMM (CStringFeatures< uint16_t > *obs, int32_t N, int32_t M, float64_t PSEUDO) |
| CHMM (int32_t N, float64_t *p, float64_t *q, float64_t *a) |
| CHMM (int32_t N, float64_t *p, float64_t *q, int32_t num_trans, float64_t *a_trans) |
| CHMM (FILE *model_file, float64_t PSEUDO) |
| CHMM (CHMM *h) |
| Constructor - Clone model h.
|
virtual | ~CHMM () |
| Destructor - Cleanup.
|
virtual bool | train (CFeatures *data=NULL) |
virtual int32_t | get_num_model_parameters () |
virtual float64_t | get_log_model_parameter (int32_t num_param) |
virtual float64_t | get_log_derivative (int32_t num_param, int32_t num_example) |
virtual float64_t | get_log_likelihood_example (int32_t num_example) |
bool | initialize (Model *model, float64_t PSEUDO, FILE *model_file=NULL) |
|
forward/backward/viterbi algorithm
|
float64_t | forward_comp (int32_t time, int32_t state, int32_t dimension) |
float64_t | forward_comp_old (int32_t time, int32_t state, int32_t dimension) |
float64_t | backward_comp (int32_t time, int32_t state, int32_t dimension) |
float64_t | backward_comp_old (int32_t time, int32_t state, int32_t dimension) |
float64_t | best_path (int32_t dimension) |
uint16_t | get_best_path_state (int32_t dim, int32_t t) |
float64_t | model_probability_comp () |
float64_t | model_probability (int32_t dimension=-1) |
| inline proxy for model probability.
|
float64_t | linear_model_probability (int32_t dimension) |
|
|
bool | set_iterations (int32_t num) |
int32_t | get_iterations () |
bool | set_epsilon (float64_t eps) |
float64_t | get_epsilon () |
bool | baum_welch_viterbi_train (BaumWelchViterbiType type) |
|
|
void | estimate_model_baum_welch (CHMM *train) |
void | estimate_model_baum_welch_trans (CHMM *train) |
void | estimate_model_baum_welch_old (CHMM *train) |
void | estimate_model_baum_welch_defined (CHMM *train) |
void | estimate_model_viterbi (CHMM *train) |
void | estimate_model_viterbi_defined (CHMM *train) |
|
|
void | output_model (bool verbose=false) |
void | output_model_defined (bool verbose=false) |
| performs output_model only for the defined transitions etc
|
|
|
void | normalize (bool keep_dead_states=false) |
| normalize the model to satisfy stochasticity
|
void | add_states (int32_t num_states, float64_t default_val=0) |
bool | append_model (CHMM *append_model, float64_t *cur_out, float64_t *app_out) |
bool | append_model (CHMM *append_model) |
void | chop (float64_t value) |
| set any model parameter with probability smaller than value to ZERO
|
void | convert_to_log () |
| convert model to log probabilities
|
void | init_model_random () |
| init model with random values
|
void | init_model_defined () |
void | clear_model () |
| initializes model with log(PSEUDO)
|
void | clear_model_defined () |
| initializes only parameters in learn_x with log(PSEUDO)
|
void | copy_model (CHMM *l) |
| copies the the modelparameters from l
|
void | invalidate_model () |
bool | get_status () const |
float64_t | get_pseudo () const |
| returns current pseudo value
|
void | set_pseudo (float64_t pseudo) |
| sets current pseudo value
|
|
|
void | set_observations (CStringFeatures< uint16_t > *obs, CHMM *hmm=NULL) |
void | set_observation_nocache (CStringFeatures< uint16_t > *obs) |
CStringFeatures< uint16_t > * | get_observations () |
| return observation pointer
|
|
for observations/model/traindefinitions
|
bool | load_definitions (FILE *file, bool verbose, bool initialize=true) |
bool | load_model (FILE *file) |
bool | save_model (FILE *file) |
bool | save_model_derivatives (FILE *file) |
bool | save_model_derivatives_bin (FILE *file) |
bool | save_model_bin (FILE *file) |
bool | check_model_derivatives () |
| numerically check whether derivates were calculated right
|
bool | check_model_derivatives_combined () |
T_STATES * | get_path (int32_t dim, float64_t &prob) |
bool | save_path (FILE *file) |
bool | save_path_derivatives (FILE *file) |
bool | save_path_derivatives_bin (FILE *file) |
bool | save_likelihood_bin (FILE *file) |
bool | save_likelihood (FILE *file) |
|
for all the arrays a,b,p,q,A,B,psi and scalar model parameters like N,M
|
T_STATES | get_N () const |
| access function for number of states N
|
int32_t | get_M () const |
| access function for number of observations M
|
void | set_q (T_STATES offset, float64_t value) |
void | set_p (T_STATES offset, float64_t value) |
void | set_A (T_STATES line_, T_STATES column, float64_t value) |
void | set_a (T_STATES line_, T_STATES column, float64_t value) |
void | set_B (T_STATES line_, uint16_t column, float64_t value) |
void | set_b (T_STATES line_, uint16_t column, float64_t value) |
void | set_psi (int32_t time, T_STATES state, T_STATES value, int32_t dimension) |
float64_t | get_q (T_STATES offset) const |
float64_t | get_p (T_STATES offset) const |
float64_t | get_A (T_STATES line_, T_STATES column) const |
float64_t | get_a (T_STATES line_, T_STATES column) const |
float64_t | get_B (T_STATES line_, uint16_t column) const |
float64_t | get_b (T_STATES line_, uint16_t column) const |
T_STATES | get_psi (int32_t time, T_STATES state, int32_t dimension) const |
|
management and access functions for observation matrix
|
float64_t | state_probability (int32_t time, int32_t state, int32_t dimension) |
| calculates probability of being in state i at time t for dimension
|
float64_t | transition_probability (int32_t time, int32_t state_i, int32_t state_j, int32_t dimension) |
| calculates probability of being in state i at time t and state j at time t+1 for dimension
|
|
computes log dp(lambda)/d lambda_i
- Parameters:
-
| dimension | dimension for that derivatives are calculated |
| i,j | parameter specific |
|
float64_t | linear_model_derivative (T_STATES i, uint16_t j, int32_t dimension) |
float64_t | model_derivative_p (T_STATES i, int32_t dimension) |
float64_t | model_derivative_q (T_STATES i, int32_t dimension) |
float64_t | model_derivative_a (T_STATES i, T_STATES j, int32_t dimension) |
| computes log dp(lambda)/d a_ij.
|
float64_t | model_derivative_b (T_STATES i, uint16_t j, int32_t dimension) |
| computes log dp(lambda)/d b_ij.
|
|
computes d log p(lambda,best_path)/d lambda_i
- Parameters:
-
| dimension | dimension for that derivatives are calculated |
| i,j | parameter specific |
|
float64_t | path_derivative_p (T_STATES i, int32_t dimension) |
| computes d log p(lambda,best_path)/d p_i
|
float64_t | path_derivative_q (T_STATES i, int32_t dimension) |
| computes d log p(lambda,best_path)/d q_i
|
float64_t | path_derivative_a (T_STATES i, T_STATES j, int32_t dimension) |
| computes d log p(lambda,best_path)/d a_ij
|
float64_t | path_derivative_b (T_STATES i, uint16_t j, int32_t dimension) |
| computes d log p(lambda,best_path)/d b_ij
|
Protected Member Functions |
void | prepare_path_derivative (int32_t dim) |
| initialization function that is called before path_derivatives are calculated
|
float64_t | forward (int32_t time, int32_t state, int32_t dimension) |
| inline proxies for forward pass
|
float64_t | backward (int32_t time, int32_t state, int32_t dimension) |
| inline proxies for backward pass
|
|
for reading model/definition/observation files
|
bool | get_numbuffer (FILE *file, char *buffer, int32_t length) |
| put a sequence of numbers into the buffer
|
void | open_bracket (FILE *file) |
| expect open bracket.
|
void | close_bracket (FILE *file) |
| expect closing bracket
|
bool | comma_or_space (FILE *file) |
| expect comma or space.
|
void | error (int32_t p_line, const char *str) |
| parse error messages
|
Protected Attributes |
float64_t * | arrayN1 |
float64_t * | arrayN2 |
T_ALPHA_BETA | alpha_cache |
| cache for forward variables can be terrible HUGE O(T*N)
|
T_ALPHA_BETA | beta_cache |
| cache for backward variables can be terrible HUGE O(T*N)
|
T_STATES * | states_per_observation_psi |
| backtracking table for viterbi can be terrible HUGE O(T*N)
|
T_STATES * | path |
| best path (=state sequence) through model
|
bool | path_prob_updated |
| true if path probability is up to date
|
int32_t | path_prob_dimension |
| dimension for which path_prob was calculated
|
|
these are p,q,a,b,N,M etc
|
int32_t | M |
| number of observation symbols eg. ACGT -> 0123
|
int32_t | N |
| number of states
|
float64_t | PSEUDO |
| define pseudocounts against overfitting
|
int32_t | line |
CStringFeatures< uint16_t > * | p_observations |
| observation matrix
|
Model * | model |
float64_t * | transition_matrix_A |
| matrix of absolute counts of transitions
|
float64_t * | observation_matrix_B |
| matrix of absolute counts of observations within each state
|
float64_t * | transition_matrix_a |
| transition matrix
|
float64_t * | initial_state_distribution_p |
| initial distribution of states
|
float64_t * | end_state_distribution_q |
| distribution of end-states
|
float64_t * | observation_matrix_b |
| distribution of observations within each state
|
int32_t | iterations |
| convergence criterion iterations
|
int32_t | iteration_count |
float64_t | epsilon |
| convergence criterion epsilon
|
int32_t | conv_it |
float64_t | all_pat_prob |
| probability of best path
|
float64_t | pat_prob |
| probability of best path
|
float64_t | mod_prob |
| probability of model
|
bool | mod_prob_updated |
| true if model probability is up to date
|
bool | all_path_prob_updated |
| true if path probability is up to date
|
int32_t | path_deriv_dimension |
| dimension for which path_deriv was calculated
|
bool | path_deriv_updated |
| true if path derivative is up to date
|
bool | loglikelihood |
bool | status |
bool | reused_caches |
Static Protected Attributes |
static const int32_t | GOTN = (1<<1) |
static const int32_t | GOTM = (1<<2) |
static const int32_t | GOTO = (1<<3) |
static const int32_t | GOTa = (1<<4) |
static const int32_t | GOTb = (1<<5) |
static const int32_t | GOTp = (1<<6) |
static const int32_t | GOTq = (1<<7) |
static const int32_t | GOTlearn_a = (1<<1) |
static const int32_t | GOTlearn_b = (1<<2) |
static const int32_t | GOTlearn_p = (1<<3) |
static const int32_t | GOTlearn_q = (1<<4) |
static const int32_t | GOTconst_a = (1<<5) |
static const int32_t | GOTconst_b = (1<<6) |
static const int32_t | GOTconst_p = (1<<7) |
static const int32_t | GOTconst_q = (1<<8) |
bool load_definitions |
( |
FILE * |
file, |
|
|
bool |
verbose, |
|
|
bool |
initialize = true | |
|
) |
| | |
read definitions file (learn_x,const_x) used for training. -format specs: definition_file (train.def) % HMM-TRAIN - specification % learn_a - elements in state_transition_matrix to be learned % learn_b - elements in oberservation_per_state_matrix to be learned % note: each line stands for % state, observation(0), observation(1)...observation(NOW) % learn_p - elements in initial distribution to be learned % learn_q - elements in the end-state distribution to be learned % % const_x - specifies initial values of elements % rest is assumed to be 0.0 % % NOTE: IMPLICIT DEFINES: % define A 0 % define C 1 % define G 2 % define T 3
learn_a=[ [int32_t,int32_t]; [int32_t,int32_t]; [int32_t,int32_t]; ........ [int32_t,int32_t]; [-1,-1]; ];
learn_b=[ [int32_t,int32_t,int32_t,...,int32_t]; [int32_t,int32_t,int32_t,...,int32_t]; [int32_t,int32_t,int32_t,...,int32_t]; ........ [int32_t,int32_t,int32_t,...,int32_t]; [-1,-1]; ];
learn_p= [ int32_t, ... , int32_t, -1 ];
learn_q= [ int32_t, ... , int32_t, -1 ];
const_a=[ [int32_t,int32_t,float64_t]; [int32_t,int32_t,float64_t]; [int32_t,int32_t,float64_t]; ........ [int32_t,int32_t,float64_t]; [-1,-1,-1]; ];
const_b=[ [int32_t,int32_t,int32_t,...,int32_t,float64_t]; [int32_t,int32_t,int32_t,...,int32_t,float64_t]; [int32_t,int32_t,int32_t,...,int32_t,<DOUBLE]; ........ [int32_t,int32_t,int32_t,...,int32_t,float64_t]; [-1,-1,-1]; ];
const_p[]=[ [int32_t, float64_t], ... , [int32_t,float64_t], [-1,-1] ]; const_q[]=[ [int32_t, float64_t], ... , [int32_t,float64_t], [-1,-1] ];
- Parameters:
-
| file | filehandle to definitions file |
| verbose | true for verbose messages |
| initialize | true to initialize to underlying HMM |
Definition at line 3224 of file HMM.cpp.