23 #ifdef USE_HMMPARALLEL
24 #define USE_HMMPARALLEL_STRUCTURES 1
30 template <
class ST>
class CStringFeatures;
37 #ifndef DOXYGEN_SHOULD_SKIP_THIS
45 T_ALPHA_BETA_TABLE* table;
53 #endif // DOXYGEN_SHOULD_SKIP_THIS
110 inline int32_t
get_learn_a(int32_t line, int32_t column)
const
113 return learn_a[line*2 + column];
119 return learn_b[line*2 + column];
137 return const_a[line*2 + column];
143 return const_b[line*2 + column];
182 inline char get_fix_pos_state(int32_t pos, T_STATES state, T_STATES num_states)
186 if ((pos<0)||(pos*num_states+state>65336))
187 SG_DEBUG(
"index out of range in get_fix_pos_state(%i,%i,%i) \n", pos,state,num_states)
189 return fix_pos_state[pos*num_states+state] ;
198 inline void set_learn_a(int32_t offset, int32_t value)
270 inline void set_fix_pos_state(
272 int32_t pos, T_STATES state, T_STATES num_states,
char value)
275 if ((pos<0)||(pos*num_states+state>65336))
276 SG_DEBUG(
"index out of range in set_fix_pos_state(%i,%i,%i,%i) [%i]\n", pos,state,num_states,(
int)value, pos*num_states+state)
278 fix_pos_state[pos*num_states+state]=value;
279 if (value==FIX_ALLOWED)
280 for (int32_t i=0; i<num_states; i++)
281 if (get_fix_pos_state(pos,i,num_states)==FIX_DEFAULT)
282 set_fix_pos_state(pos,i,num_states,FIX_DISALLOWED) ;
287 const static char FIX_DISALLOWED ;
290 const static char FIX_ALLOWED ;
293 const static char FIX_DEFAULT ;
296 const static float64_t DISALLOWED_PENALTY ;
373 T_STATES trans_list_len ;
374 T_STATES **trans_list_forward ;
375 T_STATES *trans_list_forward_cnt ;
377 T_STATES **trans_list_backward ;
378 T_STATES *trans_list_backward_cnt ;
379 bool mem_initialized ;
381 #ifdef USE_HMMPARALLEL_STRUCTURES
384 struct S_DIM_THREAD_PARAM
392 struct S_BW_THREAD_PARAM
406 inline T_ALPHA_BETA & ALPHA_CACHE(int32_t dim) {
408 inline T_ALPHA_BETA & BETA_CACHE(int32_t dim) {
410 #ifdef USE_LOGSUMARRAY
418 inline T_STATES* STATES_PER_OBSERVATION_PSI(int32_t dim) {
420 inline const T_STATES* STATES_PER_OBSERVATION_PSI(int32_t dim)
const {
422 inline T_STATES* PATH(int32_t dim) {
424 inline bool & PATH_PROB_UPDATED(int32_t dim) {
426 inline int32_t & PATH_PROB_DIMENSION(int32_t dim) {
429 inline T_ALPHA_BETA & ALPHA_CACHE(int32_t ) {
431 inline T_ALPHA_BETA & BETA_CACHE(int32_t ) {
433 #ifdef USE_LOGSUMARRAY
441 inline T_STATES* STATES_PER_OBSERVATION_PSI(int32_t ) {
443 inline const T_STATES* STATES_PER_OBSERVATION_PSI(int32_t )
const {
445 inline T_STATES* PATH(int32_t ) {
447 inline bool & PATH_PROB_UPDATED(int32_t ) {
449 inline int32_t & PATH_PROB_DIMENSION(int32_t ) {
545 int32_t time, int32_t state, int32_t dimension);
556 int32_t time, int32_t state, int32_t dimension);
603 for (int32_t i=0; i<
N; i++)
644 #ifdef USE_HMMPARALLEL_STRUCTURES
692 void normalize(
bool keep_dead_states=
false);
763 #ifdef USE_HMMPARALLEL_STRUCTURES
764 static void* bw_dim_prefetch(
void * params);
765 static void* bw_single_dim_prefetch(
void * params);
766 static void* vit_dim_prefetch(
void * params);
773 inline bool set_fix_pos_state(int32_t pos, T_STATES state,
char value)
777 model->set_fix_pos_state(pos, state, N, value) ;
959 bool check_path_derivatives() ;
961 #endif //USE_HMMDEBUG
981 inline T_STATES
get_N()
const {
return N ; }
984 inline int32_t
get_M()
const {
return M ; }
994 SG_DEBUG(
"index out of range in set_q(%i,%e) [%i]\n", offset,value,N)
1007 SG_DEBUG(
"index out of range in set_p(%i,.) [%i]\n", offset,N)
1020 if ((line_>N)||(column>N))
1021 SG_DEBUG(
"index out of range in set_A(%i,%i,.) [%i,%i]\n",line_,column,N,N)
1034 if ((line_>N)||(column>N))
1035 SG_DEBUG(
"index out of range in set_a(%i,%i,.) [%i,%i]\n",line_,column,N,N)
1048 if ((line_>=N)||(column>=M))
1049 SG_DEBUG(
"index out of range in set_B(%i,%i) [%i,%i]\n", line_, column,N,M)
1062 if ((line_>=N)||(column>=M))
1063 SG_DEBUG(
"index out of range in set_b(%i,%i) [%i,%i]\n", line_, column,N,M)
1075 int32_t time, T_STATES state, T_STATES value, int32_t dimension)
1081 STATES_PER_OBSERVATION_PSI(dimension)[time*N+state]=value;
1105 SG_DEBUG(
"index out of range in get_p(%i,.) [%i]\n", offset,N)
1118 if ((line_>N)||(column>N))
1119 SG_DEBUG(
"index out of range in get_A(%i,%i) [%i,%i]\n",line_,column,N,N)
1132 if ((line_>N)||(column>N))
1133 SG_DEBUG(
"index out of range in get_a(%i,%i) [%i,%i]\n",line_,column,N,N)
1146 if ((line_>=N)||(column>=M))
1147 SG_DEBUG(
"index out of range in get_B(%i,%i) [%i,%i]\n", line_, column,N,M)
1160 if ((line_>=N)||(column>=M))
1161 SG_DEBUG(
"index out of range in get_b(%i,%i) [%i,%i]\n", line_, column,N,M)
1174 int32_t time, T_STATES state, int32_t dimension)
const
1180 return STATES_PER_OBSERVATION_PSI(dimension)[time*N+state];
1268 #ifdef USE_HMMPARALLEL_STRUCTURES
1273 #else //USE_HMMPARALLEL_STRUCTURES
1278 #endif //USE_HMMPARALLEL_STRUCTURES
1280 #ifdef USE_LOGSUMARRAY
1281 #ifdef USE_HMMPARALLEL_STRUCTURES
1287 #endif // USE_HMMPARALLEL_STRUCTURES
1288 #endif // USE_LOGSUMARRAY
1290 #ifdef USE_HMMPARALLEL_STRUCTURES
1309 #else //USE_HMMPARALLEL_STRUCTURES
1327 #endif //USE_HMMPARALLEL_STRUCTURES
1370 int32_t time, int32_t state, int32_t dimension)
1377 int32_t time, int32_t state_i, int32_t state_j, int32_t dimension)
1379 return forward(time, state_i, dimension) +
1380 backward(time+1, state_j, dimension) +
1394 T_STATES i, uint16_t j, int32_t dimension)
1398 for (int32_t k=0; k<
N; k++)
1400 if (k!=i || p_observations->
get_feature(dimension, k) != j)
1460 return (i==PATH(dimension)[0]) ? (exp(-
get_p(PATH(dimension)[0]))) : (0) ;
1492 bool get_numbuffer(FILE* file,
char* buffer, int32_t length);
1505 inline void error(int32_t p_line,
const char* str)
1508 SG_ERROR(
"error in line %d %s\n", p_line, str)
1517 if (path_deriv_updated && (path_deriv_dimension==dim))
1533 set_A(PATH(dim)[t], PATH(dim)[t+1],
get_A(PATH(dim)[t], PATH(dim)[t+1])+1);
1537 path_deriv_dimension=dim ;
1538 path_deriv_updated=true ;
1548 if (ALPHA_CACHE(dimension).table && (dimension==ALPHA_CACHE(dimension).dimension) && ALPHA_CACHE(dimension).updated)
1550 if (time<p_observations->get_vector_length(dimension))
1551 return ALPHA_CACHE(dimension).table[time*N+state];
1553 return ALPHA_CACHE(dimension).sum;
1562 if (BETA_CACHE(dimension).table && (dimension==BETA_CACHE(dimension).dimension) && (BETA_CACHE(dimension).updated))
1565 return BETA_CACHE(dimension).sum;
1566 if (time<p_observations->get_vector_length(dimension))
1567 return BETA_CACHE(dimension).table[time*N+state];
int32_t get_learn_p(int32_t offset) const
get entry out of learn_p vector
int32_t * learn_p
start states to be learned
virtual int32_t get_max_vector_length()
SGVector< ST > get_feature_vector(int32_t num)
void set_observation_nocache(CStringFeatures< uint16_t > *obs)
float64_t * transition_matrix_a
transition matrix
bool mod_prob_updated
true if model probability is up to date
void chop(float64_t value)
set any model parameter with probability smaller than value to ZERO
float64_t backward_comp(int32_t time, int32_t state, int32_t dimension)
int32_t N
number of states
void convert_to_log()
convert model to log probabilities
float64_t backward(int32_t time, int32_t state, int32_t dimension)
inline proxies for backward pass
static const int32_t GOTp
float64_t get_const_p_val(int32_t offset) const
get value out of const_p_val vector
bool save_likelihood(FILE *file)
void close_bracket(FILE *file)
expect closing bracket
float64_t * const_a_val
values for transitions that have constant probability
virtual const char * get_name() const
virtual int32_t get_num_model_parameters()
Model()
Constructor - initializes all variables/structures.
void set_const_p(int32_t offset, int32_t value)
set value in const_p vector
bool get_numbuffer(FILE *file, char *buffer, int32_t length)
put a sequence of numbers into the buffer
int32_t get_M() const
access function for number of observations M
float64_t pat_prob
probability of best path
float64_t get_const_b_val(int32_t line) const
get value out of const_b_val vector
int32_t get_num_threads() const
bool save_model(FILE *file)
void set_observations(CStringFeatures< uint16_t > *obs, CHMM *hmm=NULL)
void set_const_p_val(int32_t offset, float64_t value)
set value in const_p_val vector
static const float64_t INFTY
infinity
T_STATES * states_per_observation_psi
backtracking table for viterbi can be terrible HUGE O(T*N)
float64_t forward(int32_t time, int32_t state, int32_t dimension)
inline proxies for forward pass
float64_t * const_q_val
values for end states that have constant probability
bool all_path_prob_updated
true if path probability is up to date
float64_t epsilon
convergence criterion epsilon
void estimate_model_baum_welch_defined(CHMM *train)
static const int32_t GOTconst_p
viterbi only for defined transitions/observations
virtual int32_t get_num_vectors() const
int32_t get_const_p(int32_t offset) const
get entry out of const_p vector
baum welch only for defined transitions/observations
bool linear_train(bool right_align=false)
estimates linear model from observations.
bool save_model_bin(FILE *file)
int32_t * learn_b
emissions to be learned
float64_t * transition_matrix_A
matrix of absolute counts of transitions
static const int32_t GOTlearn_a
float64_t path_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
computes d log p(lambda,best_path)/d b_ij
T_ALPHA_BETA beta_cache
cache for backward variables can be terrible HUGE O(T*N)
float64_t get_b(T_STATES line_, uint16_t column) const
bool path_prob_updated
true if path probability is up to date
bool save_likelihood_bin(FILE *file)
float64_t state_probability(int32_t time, int32_t state, int32_t dimension)
calculates probability of being in state i at time t for dimension
bool baum_welch_viterbi_train(BaumWelchViterbiType type)
static const int32_t GOTO
float64_t forward_comp_old(int32_t time, int32_t state, int32_t dimension)
float64_t * observation_matrix_B
matrix of absolute counts of observations within each state
float64_t get_A(T_STATES line_, T_STATES column) const
float64_t T_ALPHA_BETA_TABLE
type for alpha/beta caching table
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
Base class Distribution from which all methods implementing a distribution are derived.
bool check_model_derivatives()
numerically check whether derivates were calculated right
float64_t linear_model_derivative(T_STATES i, uint16_t j, int32_t dimension)
static const int32_t GOTlearn_p
void open_bracket(FILE *file)
expect open bracket.
int32_t get_learn_q(int32_t offset) const
get entry out of learn_q vector
float64_t get_pseudo() const
returns current pseudo value
float64_t get_B(T_STATES line_, uint16_t column) const
float64_t * const_b_val
values for emissions that have constant probability
static const int32_t GOTlearn_q
uint16_t get_best_path_state(int32_t dim, int32_t t)
CStringFeatures< uint16_t > * get_observations()
return observation pointer
virtual bool train(CFeatures *data=NULL)
bool save_model_derivatives(FILE *file)
virtual float64_t get_log_model_parameter(int32_t num_param)
void estimate_model_baum_welch_old(CHMM *train)
int32_t path_prob_dimension
dimension for which path_prob was calculated
virtual ~Model()
Destructor - cleans up.
int32_t * learn_q
end states to be learned
bool load_model(FILE *file)
void set_pseudo(float64_t pseudo)
sets current pseudo value
void estimate_model_baum_welch_trans(CHMM *train)
float64_t model_probability(int32_t dimension=-1)
inline proxy for model probability.
int32_t path_deriv_dimension
dimension for which path_deriv was calculated
int32_t * const_a
transitions that have constant probability
static const int32_t GOTb
float64_t mod_prob
probability of model
bool check_model_derivatives_combined()
float64_t model_derivative_q(T_STATES i, int32_t dimension)
float64_t * const_p_val
values for start states that have constant probability
T_STATES get_psi(int32_t time, T_STATES state, int32_t dimension) const
virtual float64_t get_log_likelihood_example(int32_t num_example)
void set_p(T_STATES offset, float64_t value)
static const int32_t GOTconst_a
bool permutation_entropy(int32_t window_width, int32_t sequence_number)
compute permutation entropy
float64_t * end_state_distribution_q
distribution of end-states
void add_states(int32_t num_states, float64_t default_val=0)
float64_t PSEUDO
define pseudocounts against overfitting
float64_t all_pat_prob
probability of best path
float64_t path_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
computes d log p(lambda,best_path)/d a_ij
void set_const_q_val(int32_t offset, float64_t value)
set value in const_q_val vector
float64_t transition_probability(int32_t time, int32_t state_i, int32_t state_j, int32_t dimension)
calculates probability of being in state i at time t and state j at time t+1 for dimension ...
bool save_model_derivatives_bin(FILE *file)
float64_t get_q(T_STATES offset) const
int32_t * const_q
end states that have constant probability
CStringFeatures< uint16_t > * p_observations
observation matrix
bool save_path_derivatives(FILE *file)
virtual ST get_feature(int32_t vec_num, int32_t feat_num)
void set_learn_a(int32_t offset, int32_t value)
set value in learn_a matrix
void set_learn_q(int32_t offset, int32_t value)
set value in learn_q vector
void set_A(T_STATES line_, T_STATES column, float64_t value)
void set_q(T_STATES offset, float64_t value)
void set_B(T_STATES line_, uint16_t column, float64_t value)
int32_t get_const_q(int32_t offset) const
get entry out of const_q vector
int32_t iterations
convergence criterion iterations
float64_t get_const_q_val(int32_t offset) const
get value out of const_q_val vector
static const int32_t GOTq
static const int32_t GOTlearn_b
float64_t * observation_matrix_b
distribution of observations within each state
T_STATES * get_path(int32_t dim, float64_t &prob)
float64_t model_derivative_a(T_STATES i, T_STATES j, int32_t dimension)
computes log dp(lambda)/d a_ij.
float64_t best_path(int32_t dimension)
int32_t get_const_a(int32_t line, int32_t column) const
get entry out of const_a matrix
float64_t model_probability_comp()
void set_const_b_val(int32_t offset, float64_t value)
set value in const_b_val vector
bool path_deriv_updated
true if path derivative is up to date
virtual float64_t get_log_derivative(int32_t num_param, int32_t num_example)
bool set_epsilon(float64_t eps)
bool append_model(CHMM *append_model, float64_t *cur_out, float64_t *app_out)
void estimate_model_viterbi(CHMM *train)
int32_t M
number of observation symbols eg. ACGT -> 0123
void sort_learn_a()
sorts learn_a matrix
void set_learn_p(int32_t offset, int32_t value)
set value in learn_p vector
bool load_definitions(FILE *file, bool verbose, bool initialize=true)
bool set_iterations(int32_t num)
float64_t linear_model_probability(int32_t dimension)
void set_learn_b(int32_t offset, int32_t value)
set value in learn_b matrix
float64_t * initial_state_distribution_p
initial distribution of states
bool save_path_derivatives_bin(FILE *file)
baum welch only for specified transitions
float64_t get_a(T_STATES line_, T_STATES column) const
all of classes and functions are contained in the shogun namespace
void clear_model_defined()
initializes only parameters in learn_x with log(PSEUDO)
float64_t path_derivative_p(T_STATES i, int32_t dimension)
computes d log p(lambda,best_path)/d p_i
float64_t forward_comp(int32_t time, int32_t state, int32_t dimension)
float64_t model_derivative_p(T_STATES i, int32_t dimension)
float64_t get_p(T_STATES offset) const
int32_t * const_b
emissions that have constant probability
bool comma_or_space(FILE *file)
expect comma or space.
void estimate_model_viterbi_defined(CHMM *train)
void sort_learn_b()
sorts learn_b matrix
The class Features is the base class of all feature objects.
void init_model_defined()
bool initialize_hmm(Model *model, float64_t PSEUDO, FILE *model_file=NULL)
static const int32_t GOTconst_b
void free_state_dependend_arrays()
free memory that depends on N
void set_const_a_val(int32_t offset, float64_t value)
set value in const_a_val vector
bool alloc_state_dependend_arrays()
allocates memory that depends on N
void set_const_a(int32_t offset, int32_t value)
set value in const_a matrix
void set_b(T_STATES line_, uint16_t column, float64_t value)
float64_t path_derivative_q(T_STATES i, int32_t dimension)
computes d log p(lambda,best_path)/d q_i
int32_t get_learn_a(int32_t line, int32_t column) const
get entry out of learn_a matrix
float64_t backward_comp_old(int32_t time, int32_t state, int32_t dimension)
static const int32_t GOTM
void prepare_path_derivative(int32_t dim)
initialization function that is called before path_derivatives are calculated
static void sort(int32_t *a, int32_t cols, int32_t sort_col=0)
virtual ~CHMM()
Destructor - Cleanup.
void copy_model(CHMM *l)
copies the the modelparameters from l
void clear_model()
initializes model with log(PSEUDO)
int32_t get_const_b(int32_t line, int32_t column) const
get entry out of const_b matrix
void output_model_defined(bool verbose=false)
performs output_model only for the defined transitions etc
void output_model(bool verbose=false)
static const int32_t GOTconst_q
void set_psi(int32_t time, T_STATES state, T_STATES value, int32_t dimension)
void set_a(T_STATES line_, T_STATES column, float64_t value)
void normalize(bool keep_dead_states=false)
normalize the model to satisfy stochasticity
static float64_t logarithmic_sum(float64_t p, float64_t q)
static const int32_t GOTN
T_STATES * path
best path (=state sequence) through model
int32_t * const_p
start states that have constant probability
void init_model_random()
init model with random values
void set_const_b(int32_t offset, int32_t value)
set value in const_b matrix
T_STATES get_N() const
access function for number of states N
void set_const_q(int32_t offset, int32_t value)
set value in const_q vector
int32_t get_learn_b(int32_t line, int32_t column) const
get entry out of learn_b matrix
void estimate_model_baum_welch(CHMM *train)
int32_t * learn_a
transitions to be learned
void error(int32_t p_line, const char *str)
parse error messages
bool save_path(FILE *file)
virtual int32_t get_vector_length(int32_t vec_num)
float64_t model_derivative_b(T_STATES i, uint16_t j, int32_t dimension)
computes log dp(lambda)/d b_ij.
float64_t get_const_a_val(int32_t line) const
get value out of const_a_val vector
T_ALPHA_BETA alpha_cache
cache for forward variables can be terrible HUGE O(T*N)
static const int32_t GOTa