Dynamic Programming Class.
Structure and Function collection. This Class implements a Dynamic Programming functions.
Definition at line 77 of file DynProg.h.
Public Member Functions | |
CDynProg (int32_t p_num_svms=8) | |
virtual | ~CDynProg () |
void | set_num_states (int32_t N) |
int32_t | get_num_states () |
int32_t | get_num_svms () |
void | init_content_svm_value_array (const int32_t p_num_svms) |
void | init_tiling_data (int32_t *probe_pos, float64_t *intensities, const int32_t num_probes) |
void | precompute_tiling_plifs (CPlif **PEN, const int32_t *tiling_plif_ids, const int32_t num_tiling_plifs) |
void | resize_lin_feat (int32_t num_new_feat) |
void | set_p_vector (SGVector< float64_t > p) |
void | set_q_vector (SGVector< float64_t > q) |
void | set_a (SGMatrix< float64_t > a) |
void | set_a_id (SGMatrix< int32_t > a) |
void | set_a_trans_matrix (SGMatrix< float64_t > a_trans) |
void | init_mod_words_array (SGMatrix< int32_t > p_mod_words_array) |
bool | check_svm_arrays () |
void | set_observation_matrix (SGNDArray< float64_t > seq) |
int32_t | get_num_positions () |
void | set_content_type_array (SGMatrix< float64_t > seg_path) |
void | set_pos (SGVector< int32_t > pos) |
void | set_orf_info (SGMatrix< int32_t > orf_info) |
void | set_gene_string (SGVector< char > genestr) |
void | set_dict_weights (SGMatrix< float64_t > dictionary_weights) |
void | best_path_set_segment_loss (SGMatrix< float64_t > segment_loss) |
void | best_path_set_segment_ids_mask (int32_t *segment_ids, float64_t *segment_mask, int32_t m) |
void | set_sparse_features (CSparseFeatures< float64_t > *seq_sparse1, CSparseFeatures< float64_t > *seq_sparse2) |
void | set_plif_matrices (CPlifMatrix *pm) |
SGVector< float64_t > | get_scores () |
SGMatrix< int32_t > | get_states () |
SGMatrix< int32_t > | get_positions () |
void | compute_nbest_paths (int32_t max_num_signals, bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences) |
void | best_path_trans_deriv (int32_t *my_state_seq, int32_t *my_pos_seq, int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals) |
void | set_my_state_seq (int32_t *my_state_seq) |
void | set_my_pos_seq (int32_t *my_pos_seq) |
void | get_path_scores (float64_t **my_scores, int32_t *seq_len) |
void | get_path_losses (float64_t **my_losses, int32_t *seq_len) |
T_STATES | get_N () const |
access function for number of states N | |
void | set_q (T_STATES offset, float64_t value) |
void | set_p (T_STATES offset, float64_t value) |
void | set_a (T_STATES line_, T_STATES column, float64_t value) |
float64_t | get_q (T_STATES offset) const |
float64_t | get_q_deriv (T_STATES offset) const |
float64_t | get_p (T_STATES offset) const |
float64_t | get_p_deriv (T_STATES offset) const |
void | precompute_content_values () |
float64_t * | get_lin_feat (int32_t &dim1, int32_t &dim2) |
void | set_lin_feat (float64_t *p_lin_feat, int32_t p_num_svms, int32_t p_seq_len) |
void | create_word_string () |
void | precompute_stop_codons () |
float64_t | get_a (T_STATES line_, T_STATES column) const |
float64_t | get_a_deriv (T_STATES line_, T_STATES column) const |
void | set_intron_list (CIntronList *intron_list, int32_t num_plifs) |
CSegmentLoss * | get_segment_loss_object () |
void | long_transition_settings (bool use_long_transitions, int32_t threshold, int32_t max_len) |
Protected Member Functions | |
void | lookup_content_svm_values (const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t *svm_values, int32_t frame) |
void | lookup_tiling_plif_values (const int32_t from_state, const int32_t to_state, const int32_t len, float64_t *svm_values) |
int32_t | find_frame (const int32_t from_state) |
int32_t | raw_intensities_interval_query (const int32_t from_pos, const int32_t to_pos, float64_t *intensities, int32_t type) |
bool | extend_orf (int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to) |
virtual const char * | get_name () const |
Protected Attributes | |
int32_t | m_num_degrees |
int32_t | m_num_svms |
CArray< int32_t > | m_word_degree |
CArray< int32_t > | m_cum_num_words |
int32_t * | m_cum_num_words_array |
CArray< int32_t > | m_num_words |
int32_t * | m_num_words_array |
CArray2< int32_t > | m_mod_words |
int32_t * | m_mod_words_array |
CArray< bool > | m_sign_words |
bool * | m_sign_words_array |
CArray< int32_t > | m_string_words |
int32_t * | m_string_words_array |
CArray< int32_t > | m_num_unique_words |
bool | m_svm_arrays_clean |
int32_t | m_max_a_id |
CArray3< float64_t > | m_observation_matrix |
CArray< int32_t > | m_pos |
int32_t | m_seq_len |
CArray2< int32_t > | m_orf_info |
CArray2< float64_t > | m_segment_sum_weights |
CArray< CPlifBase * > | m_plif_list |
CArray2< CPlifBase * > | m_PEN |
CArray2< CPlifBase * > | m_PEN_state_signals |
CArray< char > | m_genestr |
uint16_t *** | m_wordstr |
CArray2< float64_t > | m_dict_weights |
CArray3< float64_t > | m_segment_loss |
CArray< int32_t > | m_segment_ids |
CArray< float64_t > | m_segment_mask |
CArray< int32_t > | m_my_state_seq |
CArray< int32_t > | m_my_pos_seq |
CArray< float64_t > | m_my_scores |
CArray< float64_t > | m_my_losses |
CSegmentLoss * | m_seg_loss_obj |
CArray< float64_t > | m_scores |
CArray2< int32_t > | m_states |
CArray2< int32_t > | m_positions |
CSparseFeatures< float64_t > * | m_seq_sparse1 |
CSparseFeatures< float64_t > * | m_seq_sparse2 |
CPlifMatrix * | m_plif_matrices |
CArray< bool > | m_genestr_stop |
CIntronList * | m_intron_list |
int32_t | m_num_intron_plifs |
CArray2< float64_t > | m_lin_feat |
float64_t * | m_raw_intensities |
int32_t * | m_probe_pos |
int32_t * | m_num_probes_cum |
int32_t * | m_num_lin_feat_plifs_cum |
int32_t | m_num_raw_data |
bool | m_long_transitions |
int32_t | m_long_transition_threshold |
model specific variables. | |
int32_t | m_N |
number of states | |
CArray2< int32_t > | m_transition_matrix_a_id |
transition matrix | |
CArray2< float64_t > | m_transition_matrix_a |
CArray2< float64_t > | m_transition_matrix_a_deriv |
CArray< float64_t > | m_initial_state_distribution_p |
initial distribution of states | |
CArray< float64_t > | m_initial_state_distribution_p_deriv |
CArray< float64_t > | m_end_state_distribution_q |
distribution of end-states | |
CArray< float64_t > | m_end_state_distribution_q_deriv |
Static Protected Attributes | |
static int32_t | word_degree_default [4] = {3,4,5,6} |
static int32_t | cum_num_words_default [5] = {0,64,320,1344,5440} |
static int32_t | frame_plifs [3] = {4,5,6} |
static int32_t | num_words_default [4] = {64,256,1024,4096} |
static int32_t | mod_words_default [32] |
static bool | sign_words_default [16] |
static int32_t | string_words_default [16] |
CDynProg | ( | int32_t | p_num_svms = 8 |
) |
~CDynProg | ( | ) | [virtual] |
Definition at line 147 of file DynProg.cpp.
void best_path_set_segment_ids_mask | ( | int32_t * | segment_ids, | |
float64_t * | segment_mask, | |||
int32_t | m | |||
) |
set best path segmend ids mask
segment_ids | segment ids | |
segment_mask | segment mask | |
m | dimension m |
Definition at line 820 of file DynProg.cpp.
set best path segment loss
segment_loss | segment loss |
Definition at line 803 of file DynProg.cpp.
void best_path_trans_deriv | ( | int32_t * | my_state_seq, | |
int32_t * | my_pos_seq, | |||
int32_t | my_seq_len, | |||
const float64_t * | seq_array, | |||
int32_t | max_num_signals | |||
) |
given a path though the state model and the corresponding positions compute the features. This can be seen as the derivative of the score (output of dynamic program) with respect to the parameters
my_state_seq | state sequence of the path | |
my_pos_seq | sequence of positions | |
my_seq_len | length of state and position sequences | |
seq_array | array of features | |
max_num_signals | maximal number of signals |
Definition at line 2053 of file DynProg.cpp.
bool check_svm_arrays | ( | ) |
check SVM arrays call this function to check consistency
Definition at line 606 of file DynProg.cpp.
void compute_nbest_paths | ( | int32_t | max_num_signals, | |
bool | use_orf, | |||
int16_t | nbest, | |||
bool | with_loss, | |||
bool | with_multiple_sequences | |||
) |
run the viterbi algorithm to compute the n best viterbi paths
max_num_signals | maximal number of signals for a single state | |
use_orf | whether orf shall be used | |
nbest | number of best paths (n) | |
with_loss | use loss | |
with_multiple_sequences | !!!not functional set to false!!! |
Definition at line 949 of file DynProg.cpp.
void create_word_string | ( | ) |
create word string from char* Jonas
Definition at line 366 of file DynProg.cpp.
bool extend_orf | ( | int32_t | orf_from, | |
int32_t | orf_to, | |||
int32_t | start, | |||
int32_t & | last_pos, | |||
int32_t | to | |||
) | [protected] |
extend orf
orf_from | orf from | |
orf_to | orf to | |
start | start | |
last_pos | last position | |
to | to |
Definition at line 898 of file DynProg.cpp.
int32_t find_frame | ( | const int32_t | from_state | ) | [protected] |
find frame
from_state | from state |
float64_t* get_lin_feat | ( | int32_t & | dim1, | |
int32_t & | dim2 | |||
) |
T_STATES get_N | ( | ) | const |
virtual const char* get_name | ( | void | ) | const [protected, virtual] |
int32_t get_num_positions | ( | ) |
get number of positions; the dynamic program is sparse encoded and this function gives the number of positions that can actually be part of a predicted path
Definition at line 684 of file DynProg.cpp.
int32_t get_num_states | ( | ) |
get num states
Definition at line 239 of file DynProg.cpp.
int32_t get_num_svms | ( | ) |
get num svms
Definition at line 195 of file DynProg.cpp.
void get_path_losses | ( | float64_t ** | my_losses, | |
int32_t * | seq_len | |||
) |
get path losses
best_path_trans_deriv result retrieval functions
my_losses | my losses | |
seq_len | length of sequence |
Definition at line 882 of file DynProg.cpp.
void get_path_scores | ( | float64_t ** | my_scores, | |
int32_t * | seq_len | |||
) |
get path scores
best_path_trans_deriv result retrieval functions
my_scores | scores | |
seq_len | length of sequence |
Definition at line 868 of file DynProg.cpp.
SGMatrix< int32_t > get_positions | ( | ) |
CSegmentLoss* get_segment_loss_object | ( | ) |
SGMatrix< int32_t > get_states | ( | ) |
void init_content_svm_value_array | ( | const int32_t | p_num_svms | ) |
init CArray for precomputed content svm values with size seq_len x num_svms
p_num_svms,: | number of svm weight vectors for content prediction |
Definition at line 274 of file DynProg.cpp.
void init_mod_words_array | ( | SGMatrix< int32_t > | p_mod_words_array | ) |
init mod words array
p_mod_words_array | new mod words array |
Definition at line 584 of file DynProg.cpp.
void init_tiling_data | ( | int32_t * | probe_pos, | |
float64_t * | intensities, | |||
const int32_t | num_probes | |||
) |
init CArray for precomputed tiling intensitie-plif-values with size seq_len x num_svms
probe_pos | local positions of probes | |
intensities | intensities of probes | |
num_probes | number of probes |
Definition at line 244 of file DynProg.cpp.
void long_transition_settings | ( | bool | use_long_transitions, | |
int32_t | threshold, | |||
int32_t | max_len | |||
) |
void lookup_content_svm_values | ( | const int32_t | from_state, | |
const int32_t | to_state, | |||
const int32_t | from_pos, | |||
const int32_t | to_pos, | |||
float64_t * | svm_values, | |||
int32_t | frame | |||
) | [protected] |
lookup content SVM values
from_state | from state | |
to_state | to state | |
from_pos | from position | |
to_pos | to position | |
svm_values | SVM values | |
frame | frame |
Definition at line 2481 of file DynProg.cpp.
void lookup_tiling_plif_values | ( | const int32_t | from_state, | |
const int32_t | to_state, | |||
const int32_t | len, | |||
float64_t * | svm_values | |||
) | [protected] |
lookup tiling Plif values
from_state | from state | |
to_state | to state | |
len | length | |
svm_values | SVM values |
void precompute_content_values | ( | ) |
create array of precomputed content svm values
Definition at line 397 of file DynProg.cpp.
void precompute_stop_codons | ( | ) |
precompute stop codons
Definition at line 200 of file DynProg.cpp.
void precompute_tiling_plifs | ( | CPlif ** | PEN, | |
const int32_t * | tiling_plif_ids, | |||
const int32_t | num_tiling_plifs | |||
) |
precompute tiling Plifs
PEN | Plif PEN | |
tiling_plif_ids | tiling plif id's | |
num_tiling_plifs | number of tiling plifs |
Definition at line 317 of file DynProg.cpp.
int32_t raw_intensities_interval_query | ( | const int32_t | from_pos, | |
const int32_t | to_pos, | |||
float64_t * | intensities, | |||
int32_t | type | |||
) | [protected] |
raw intensities interval query
from_pos | from position | |
to_pos | to position | |
intensities | intensities | |
type | type |
Definition at line 2455 of file DynProg.cpp.
void resize_lin_feat | ( | int32_t | num_new_feat | ) |
append rows to linear features array
num_new_feat | number of new rows to add |
Definition at line 284 of file DynProg.cpp.
void set_a_id | ( | SGMatrix< int32_t > | a | ) |
set a transition matrix
a_trans | transition matrix a |
Definition at line 483 of file DynProg.cpp.
set an array of length #(candidate positions) which specifies the content type of each pos and a mask that determines to which extend the loss should be applied to this position; this is a way to encode label confidence via weights between zero and one
seg_path | seg path |
Definition at line 689 of file DynProg.cpp.
set best path dict weights
dictionary_weights | dictionary weights |
Definition at line 784 of file DynProg.cpp.
void set_gene_string | ( | SGVector< char > | genestr | ) |
void set_intron_list | ( | CIntronList * | intron_list, | |
int32_t | num_plifs | |||
) |
set intron list
intron_list | ||
num_plifs | number of intron plifs |
Definition at line 2534 of file DynProg.cpp.
void set_lin_feat | ( | float64_t * | p_lin_feat, | |
int32_t | p_num_svms, | |||
int32_t | p_seq_len | |||
) |
void set_my_pos_seq | ( | int32_t * | my_pos_seq | ) |
set best path my position sequence
my_pos_seq | my position sequence |
Definition at line 776 of file DynProg.cpp.
void set_my_state_seq | ( | int32_t * | my_state_seq | ) |
set best path my state sequence
my_state_seq | my state sequence |
Definition at line 768 of file DynProg.cpp.
void set_num_states | ( | int32_t | N | ) |
set number of states use this to set N first
N | new N |
Definition at line 223 of file DynProg.cpp.
void set_orf_info | ( | SGMatrix< int32_t > | orf_info | ) |
set best path orf info only for compute_nbest_paths
orf_info | the orf info |
Definition at line 728 of file DynProg.cpp.
void set_plif_matrices | ( | CPlifMatrix * | pm | ) |
void set_pos | ( | SGVector< int32_t > | pos | ) |
void set_sparse_features | ( | CSparseFeatures< float64_t > * | seq_sparse1, | |
CSparseFeatures< float64_t > * | seq_sparse2 | |||
) |
set sparse feature matrices
Definition at line 737 of file DynProg.cpp.
int32_t cum_num_words_default = {0,64,320,1344,5440} [static, protected] |
int32_t frame_plifs = {4,5,6} [static, protected] |
CArray<int32_t> m_cum_num_words [protected] |
int32_t* m_cum_num_words_array [protected] |
CArray2<float64_t> m_dict_weights [protected] |
CArray<float64_t> m_end_state_distribution_q [protected] |
CArray<float64_t> m_end_state_distribution_q_deriv [protected] |
CArray<bool> m_genestr_stop [protected] |
CArray<float64_t> m_initial_state_distribution_p [protected] |
CArray<float64_t> m_initial_state_distribution_p_deriv [protected] |
CIntronList* m_intron_list [protected] |
CArray2<float64_t> m_lin_feat [protected] |
int32_t m_long_transition_threshold [protected] |
bool m_long_transitions [protected] |
int32_t m_max_a_id [protected] |
CArray2<int32_t> m_mod_words [protected] |
int32_t* m_mod_words_array [protected] |
CArray<float64_t> m_my_losses [protected] |
CArray<int32_t> m_my_pos_seq [protected] |
CArray<float64_t> m_my_scores [protected] |
CArray<int32_t> m_my_state_seq [protected] |
int32_t m_num_degrees [protected] |
int32_t m_num_intron_plifs [protected] |
int32_t* m_num_lin_feat_plifs_cum [protected] |
int32_t* m_num_probes_cum [protected] |
int32_t m_num_raw_data [protected] |
int32_t m_num_svms [protected] |
CArray<int32_t> m_num_unique_words [protected] |
CArray<int32_t> m_num_words [protected] |
int32_t* m_num_words_array [protected] |
CArray3<float64_t> m_observation_matrix [protected] |
CArray2<int32_t> m_orf_info [protected] |
CArray2<CPlifBase*> m_PEN_state_signals [protected] |
CArray<CPlifBase*> m_plif_list [protected] |
CPlifMatrix* m_plif_matrices [protected] |
CArray2<int32_t> m_positions [protected] |
int32_t* m_probe_pos [protected] |
float64_t* m_raw_intensities [protected] |
CSegmentLoss* m_seg_loss_obj [protected] |
CArray<int32_t> m_segment_ids [protected] |
CArray3<float64_t> m_segment_loss [protected] |
CArray<float64_t> m_segment_mask [protected] |
CArray2<float64_t> m_segment_sum_weights [protected] |
int32_t m_seq_len [protected] |
CSparseFeatures<float64_t>* m_seq_sparse1 [protected] |
CSparseFeatures<float64_t>* m_seq_sparse2 [protected] |
CArray<bool> m_sign_words [protected] |
bool* m_sign_words_array [protected] |
CArray<int32_t> m_string_words [protected] |
int32_t* m_string_words_array [protected] |
bool m_svm_arrays_clean [protected] |
CArray2<float64_t> m_transition_matrix_a [protected] |
CArray2<float64_t> m_transition_matrix_a_deriv [protected] |
CArray2<int32_t> m_transition_matrix_a_id [protected] |
CArray<int32_t> m_word_degree [protected] |
uint16_t*** m_wordstr [protected] |
wordstr is a vector of L n-gram indices, with wordstr(i) representing a number betweeen 0 and 4095 corresponding to the 6-mer in genestr(i-5:i) pos is a vector of candidate transition positions (it is input to compute_nbest_paths) t_end is some index in pos
svs has been initialized by init_svm_values
At the end of this procedure, svs.svm_values[i+s*svs.seqlen] has the value of the s-th SVM on genestr(pos(t_end-i):pos(t_end)) for every i satisfying pos(t_end)-pos(t_end-i) <= svs.maxlookback
The SVM weights are precomputed in m_dict_weights
int32_t mod_words_default [static, protected] |
int32_t num_words_default = {64,256,1024,4096} [static, protected] |
bool sign_words_default [static, protected] |
int32_t string_words_default [static, protected] |
int32_t word_degree_default = {3,4,5,6} [static, protected] |