SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DynProg.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Gunnar Raetsch
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Written (W) 2008-2009 Jonas Behr
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
13 #ifndef __CDYNPROG_H__
14 #define __CDYNPROG_H__
15 
17 #include <shogun/lib/common.h>
18 #include <shogun/base/SGObject.h>
19 #include <shogun/io/SGIO.h>
20 #include <shogun/lib/config.h>
23 #include <shogun/structure/Plif.h>
31 #include <shogun/lib/Time.h>
32 
33 
34 namespace shogun
35 {
36  template <class T> class CSparseFeatures;
37  class CIntronList;
38  class CPlifMatrix;
39  class CSegmentLoss;
40 
41  template <class T> class CDynamicArray;
42 
43 //#define DYNPROG_TIMING
44 
45 #ifdef USE_BIGSTATES
46 typedef uint16_t T_STATES ;
47 #else
48 typedef uint8_t T_STATES ;
49 #endif
50 typedef T_STATES* P_STATES ;
51 
52 #ifndef DOXYGEN_SHOULD_SKIP_THIS
53 
54 struct segment_loss_struct
55 {
57  int32_t maxlookback;
59  int32_t seqlen;
61  int32_t *segments_changed;
63  float64_t *num_segment_id;
65  int32_t *length_segment_id ;
66 };
67 #endif
68 
74 class CDynProg : public CSGObject
75 {
76 public:
81  CDynProg(int32_t p_num_svms=8);
82  virtual ~CDynProg();
83 
84  // model related functions
90  void set_num_states(int32_t N);
91 
93  int32_t get_num_states();
94 
96  int32_t get_num_svms();
97 
103  void init_content_svm_value_array(const int32_t p_num_svms);
104 
112  void init_tiling_data(int32_t* probe_pos, float64_t* intensities, const int32_t num_probes);
113 
120  void precompute_tiling_plifs(CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs);
121 
126  void resize_lin_feat(int32_t num_new_feat);
132 
138 
143  void set_a(SGMatrix<float64_t> a);
144 
149  void set_a_id(SGMatrix<int32_t> a);
150 
156 
161  void init_mod_words_array(SGMatrix<int32_t> p_mod_words_array);
162 
168  bool check_svm_arrays();
169 
175 
182  int32_t get_num_positions();
183 
194 
199  void set_pos(SGVector<int32_t> pos);
200 
206  void set_orf_info(SGMatrix<int32_t> orf_info);
207 
212  void set_gene_string(SGVector<char> genestr);
213 
214 
219  void set_dict_weights(SGMatrix<float64_t> dictionary_weights);
220 
226 
233  void best_path_set_segment_ids_mask(int32_t* segment_ids, float64_t* segment_mask, int32_t m);
234 
237 
242  void set_plif_matrices(CPlifMatrix* pm);
243 
244  // best_path result retrieval functions
250 
256 
262 
263 
272  void compute_nbest_paths(int32_t max_num_signals,
273  bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences);
274 
276 
289  int32_t* my_state_seq, int32_t *my_pos_seq,
290  int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals);
291 
292  // additional best_path_trans_deriv functions
297  void set_my_state_seq(int32_t* my_state_seq);
298 
303  void set_my_pos_seq(int32_t* my_pos_seq);
304 
312  void get_path_scores(float64_t** my_scores, int32_t* seq_len);
313 
321  void get_path_losses(float64_t** my_losses, int32_t* seq_len);
322 
323 
325  inline T_STATES get_N() const
326  {
327  return m_N ;
328  }
329 
334  inline void set_q(T_STATES offset, float64_t value)
335  {
336  m_end_state_distribution_q[offset]=value;
337  }
338 
343  inline void set_p(T_STATES offset, float64_t value)
344  {
345  m_initial_state_distribution_p[offset]=value;
346  }
347 
354  inline void set_a(T_STATES line_, T_STATES column, float64_t value)
355  {
356  m_transition_matrix_a.element(line_,column)=value; // look also best_path!
357  }
358 
364  inline float64_t get_q(T_STATES offset) const
365  {
366  return m_end_state_distribution_q[offset];
367  }
368 
374  inline float64_t get_q_deriv(T_STATES offset) const
375  {
376  return m_end_state_distribution_q_deriv[offset];
377  }
378 
384  inline float64_t get_p(T_STATES offset) const
385  {
386  return m_initial_state_distribution_p[offset];
387  }
388 
394  inline float64_t get_p_deriv(T_STATES offset) const
395  {
397  }
398 
403 
410  inline float64_t* get_lin_feat(int32_t & dim1, int32_t & dim2)
411  {
412  m_lin_feat.get_array_size(dim1, dim2);
413  return m_lin_feat.get_array();
414  }
423  inline void set_lin_feat(float64_t* p_lin_feat, int32_t p_num_svms, int32_t p_seq_len)
424  {
425  m_lin_feat.set_array(p_lin_feat, p_num_svms, p_seq_len, true, true);
426  }
431  void create_word_string();
432 
435  void precompute_stop_codons();
436 
443  inline float64_t get_a(T_STATES line_, T_STATES column) const
444  {
445  return m_transition_matrix_a.element(line_, column); // look also best_path()!
446  }
447 
454  inline float64_t get_a_deriv(T_STATES line_, T_STATES column) const
455  {
456  return m_transition_matrix_a_deriv.element(line_, column); // look also best_path()!
457  }
459 
464  void set_intron_list(CIntronList* intron_list, int32_t num_plifs);
465 
468  {
469  return m_seg_loss_obj;
470  }
471 
478  void long_transition_settings(bool use_long_transitions, int32_t threshold, int32_t max_len)
479  {
480  m_long_transitions = use_long_transitions;
481  m_long_transition_threshold = threshold;
482  SG_DEBUG("ignoring max_len\n")
483  //m_long_transition_max = max_len;
484  }
485 
486 protected:
487 
488  /* helper functions */
489 
499  void lookup_content_svm_values(const int32_t from_state,
500  const int32_t to_state, const int32_t from_pos, const int32_t to_pos,
501  float64_t* svm_values, int32_t frame);
502 
510  inline void lookup_tiling_plif_values(const int32_t from_state,
511  const int32_t to_state, const int32_t len, float64_t* svm_values);
512 
517  inline int32_t find_frame(const int32_t from_state);
518 
527  inline int32_t raw_intensities_interval_query(
528  const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type);
529 
530 #ifndef DOXYGEN_SHOULD_SKIP_THIS
531 
532  struct svm_values_struct
533  {
535  int32_t maxlookback;
537  int32_t seqlen;
538 
540  int32_t* start_pos;
542  float64_t ** svm_values_unnormalized;
544  float64_t * svm_values;
546  bool *** word_used;
548  int32_t **num_unique_words;
549  };
550 #endif // DOXYGEN_SHOULD_SKIP_THIS
551 
560  bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to);
561 
563  virtual const char* get_name() const { return "DynProg"; }
564 
565 private:
566 
567  T_STATES trans_list_len;
568  T_STATES **trans_list_forward;
569  T_STATES *trans_list_forward_cnt;
570  float64_t **trans_list_forward_val;
571  int32_t **trans_list_forward_id;
572  bool mem_initialized;
573 
574 #ifdef DYNPROG_TIMING
575  CTime MyTime;
576  CTime MyTime2;
577  CTime MyTime3;
578 
579  float64_t segment_init_time;
580  float64_t segment_pos_time;
581  float64_t segment_clean_time;
582  float64_t segment_extend_time;
583  float64_t orf_time;
584  float64_t content_time;
585  float64_t content_penalty_time;
586  float64_t content_svm_values_time ;
587  float64_t content_plifs_time ;
588  float64_t svm_init_time;
589  float64_t svm_pos_time;
590  float64_t inner_loop_time;
591  float64_t inner_loop_max_time ;
592  float64_t svm_clean_time;
593  float64_t long_transition_time ;
594 #endif
595 
596 
597 protected:
602  int32_t m_N;
604 
609 
613 
617 
619 
621  int32_t m_num_degrees;
623  int32_t m_num_svms;
624 
647 
649 // CDynamicArray<int32_t> m_svm_pos_start;
655  int32_t m_max_a_id;
656 
657  // input arguments
663  int32_t m_seq_len;
686  uint16_t*** m_wordstr;
703 
707 
708  // output arguments
715 
722 
727 
731 
734 
740 
744  int32_t* m_probe_pos;
750  int32_t m_num_raw_data;
751 
761  //int32_t m_long_transition_max ;
762 
766  static int32_t word_degree_default[4];
767 
771  static int32_t cum_num_words_default[5];
772 
775  static int32_t frame_plifs[3];
776 
779  static int32_t num_words_default[4];
780 
782  static int32_t mod_words_default[32];
783 
785  static bool sign_words_default[16];
786 
788  static int32_t string_words_default[16];
789 };
790 }
791 #endif
Class Time that implements a stopwatch based on either cpu time or wall clock time.
Definition: Time.h:47
CDynamicArray< float64_t > m_end_state_distribution_q_deriv
Definition: DynProg.h:616
bool m_svm_arrays_clean
Definition: DynProg.h:653
CDynamicArray< float64_t > m_segment_loss
Definition: DynProg.h:690
CPlifMatrix * m_plif_matrices
Definition: DynProg.h:721
float64_t get_a(T_STATES line_, T_STATES column) const
Definition: DynProg.h:443
CDynamicArray< int32_t > m_positions
Definition: DynProg.h:714
void best_path_trans_deriv(int32_t *my_state_seq, int32_t *my_pos_seq, int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
Definition: DynProg.cpp:2052
T_STATES * P_STATES
Definition: HMM.h:64
uint16_t *** m_wordstr
Definition: DynProg.h:686
CDynamicArray< float64_t > m_dict_weights
Definition: DynProg.h:688
static int32_t cum_num_words_default[5]
Definition: DynProg.h:771
void set_lin_feat(float64_t *p_lin_feat, int32_t p_num_svms, int32_t p_seq_len)
Definition: DynProg.h:423
virtual ~CDynProg()
Definition: DynProg.cpp:142
CDynamicArray< int32_t > m_segment_ids
Definition: DynProg.h:692
void set_dict_weights(SGMatrix< float64_t > dictionary_weights)
Definition: DynProg.cpp:779
void set_my_state_seq(int32_t *my_state_seq)
Definition: DynProg.cpp:763
void set_plif_matrices(CPlifMatrix *pm)
Definition: DynProg.cpp:746
void create_word_string()
Definition: DynProg.cpp:361
int32_t m_N
number of states
Definition: DynProg.h:603
T_STATES get_N() const
access function for number of states N
Definition: DynProg.h:325
void set_observation_matrix(SGNDArray< float64_t > seq)
Definition: DynProg.cpp:657
float64_t get_a_deriv(T_STATES line_, T_STATES column) const
Definition: DynProg.h:454
bool m_long_transitions
Definition: DynProg.h:753
int32_t m_max_a_id
Definition: DynProg.h:655
void set_gene_string(SGVector< char > genestr)
Definition: DynProg.cpp:755
void set_orf_info(SGMatrix< int32_t > orf_info)
Definition: DynProg.cpp:723
void get_path_scores(float64_t **my_scores, int32_t *seq_len)
Definition: DynProg.cpp:863
bool check_svm_arrays()
Definition: DynProg.cpp:601
CDynamicArray< int32_t > m_transition_matrix_a_id
transition matrix
Definition: DynProg.h:606
CDynamicArray< int32_t > m_word_degree
Definition: DynProg.h:626
int32_t * m_cum_num_words_array
Definition: DynProg.h:630
SGVector< float64_t > get_scores()
Definition: DynProg.cpp:835
T * get_array() const
Definition: DynamicArray.h:408
float64_t get_q(T_STATES offset) const
Definition: DynProg.h:364
int32_t * m_mod_words_array
Definition: DynProg.h:638
int32_t get_num_svms()
Definition: DynProg.cpp:191
CDynamicArray< float64_t > m_lin_feat
Definition: DynProg.h:739
class IntronList
Definition: SegmentLoss.h:24
float64_t get_p(T_STATES offset) const
Definition: DynProg.h:384
int32_t raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t *intensities, int32_t type)
Definition: DynProg.cpp:2456
int32_t m_num_intron_plifs
Definition: DynProg.h:733
void best_path_set_segment_ids_mask(int32_t *segment_ids, float64_t *segment_mask, int32_t m)
Definition: DynProg.cpp:815
void set_pos(SGVector< int32_t > pos)
Definition: DynProg.cpp:717
int32_t m_seq_len
Definition: DynProg.h:663
CDynamicArray< float64_t > m_initial_state_distribution_p_deriv
Definition: DynProg.h:612
int32_t get_num_positions()
Definition: DynProg.cpp:679
CDynProg(int32_t p_num_svms=8)
Definition: DynProg.cpp:46
int32_t m_num_degrees
Definition: DynProg.h:621
CSparseFeatures< float64_t > * m_seq_sparse1
Definition: DynProg.h:717
CSparseFeatures< float64_t > * m_seq_sparse2
Definition: DynProg.h:719
class Plif
Definition: Plif.h:40
int32_t * m_num_probes_cum
Definition: DynProg.h:746
void precompute_content_values()
Definition: DynProg.cpp:392
int32_t * m_num_lin_feat_plifs_cum
Definition: DynProg.h:748
CDynamicArray< float64_t > m_initial_state_distribution_p
initial distribution of states
Definition: DynProg.h:611
CDynamicArray< int32_t > m_mod_words
Definition: DynProg.h:636
class IntronList
Definition: IntronList.h:22
int32_t find_frame(const int32_t from_state)
void init_mod_words_array(SGMatrix< int32_t > p_mod_words_array)
Definition: DynProg.cpp:579
void set_a(SGMatrix< float64_t > a)
Definition: DynProg.cpp:457
CDynamicArray< float64_t > m_transition_matrix_a_deriv
Definition: DynProg.h:608
void set_p(T_STATES offset, float64_t value)
Definition: DynProg.h:343
void set_intron_list(CIntronList *intron_list, int32_t num_plifs)
Definition: DynProg.cpp:2535
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
void set_q_vector(SGVector< float64_t > q)
Definition: DynProg.cpp:450
CDynamicArray< int32_t > m_num_unique_words
Definition: DynProg.h:651
CDynamicArray< float64_t > m_transition_matrix_a
Definition: DynProg.h:607
static int32_t mod_words_default[32]
Definition: DynProg.h:782
void resize_lin_feat(int32_t num_new_feat)
Definition: DynProg.cpp:279
void precompute_stop_codons()
Definition: DynProg.cpp:196
CDynamicArray< bool > m_genestr_stop
Definition: DynProg.h:726
void lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t *svm_values, int32_t frame)
Definition: DynProg.cpp:2482
CDynamicArray< int32_t > m_orf_info
Definition: DynProg.h:665
double float64_t
Definition: common.h:50
CDynamicArray< float64_t > m_segment_sum_weights
Definition: DynProg.h:667
SGMatrix< int32_t > get_positions()
Definition: DynProg.cpp:853
int32_t * m_string_words_array
Definition: DynProg.h:646
CDynamicArray< int32_t > m_my_pos_seq
Definition: DynProg.h:698
SGMatrix< int32_t > get_states()
Definition: DynProg.cpp:843
CDynamicArray< int32_t > m_states
Definition: DynProg.h:712
static int32_t frame_plifs[3]
Definition: DynProg.h:775
CDynamicArray< int32_t > m_cum_num_words
Definition: DynProg.h:628
void best_path_set_segment_loss(SGMatrix< float64_t > segment_loss)
Definition: DynProg.cpp:798
CDynamicArray< int32_t > m_string_words
Definition: DynProg.h:644
CDynamicArray< int32_t > m_pos
Definition: DynProg.h:661
void set_content_type_array(SGMatrix< float64_t > seg_path)
Definition: DynProg.cpp:684
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
void precompute_tiling_plifs(CPlif **PEN, const int32_t *tiling_plif_ids, const int32_t num_tiling_plifs)
Definition: DynProg.cpp:312
int32_t m_long_transition_threshold
Definition: DynProg.h:756
float64_t get_q_deriv(T_STATES offset) const
Definition: DynProg.h:374
CDynamicArray< float64_t > m_end_state_distribution_q
distribution of end-states
Definition: DynProg.h:615
float64_t * get_lin_feat(int32_t &dim1, int32_t &dim2)
Definition: DynProg.h:410
void set_a(T_STATES line_, T_STATES column, float64_t value)
Definition: DynProg.h:354
void set_a_trans_matrix(SGMatrix< float64_t > a_trans)
Definition: DynProg.cpp:478
float64_t * m_raw_intensities
Definition: DynProg.h:742
void set_q(T_STATES offset, float64_t value)
Definition: DynProg.h:334
CDynamicArray< float64_t > m_my_losses
Definition: DynProg.h:702
CDynamicArray< float64_t > m_segment_mask
Definition: DynProg.h:694
CDynamicArray< char > m_genestr
Definition: DynProg.h:671
void set_my_pos_seq(int32_t *my_pos_seq)
Definition: DynProg.cpp:771
CDynamicArray< float64_t > m_scores
Definition: DynProg.h:710
uint8_t T_STATES
Definition: HMM.h:62
void set_array(T *p_array, int32_t p_num_elements, int32_t array_size)
Definition: DynamicArray.h:419
float64_t get_p_deriv(T_STATES offset) const
Definition: DynProg.h:394
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CDynamicArray< float64_t > m_observation_matrix
Definition: DynProg.h:659
CDynamicArray< bool > m_sign_words
Definition: DynProg.h:640
CDynamicArray< int32_t > m_num_words
Definition: DynProg.h:632
int32_t get_num_states()
Definition: DynProg.cpp:234
CDynamicObjectArray m_plif_list
Definition: DynProg.h:669
CDynamicArray< float64_t > m_my_scores
Definition: DynProg.h:700
int32_t * m_probe_pos
Definition: DynProg.h:744
Dynamic Programming Class.
Definition: DynProg.h:74
int32_t * m_num_words_array
Definition: DynProg.h:634
void set_p_vector(SGVector< float64_t > p)
Definition: DynProg.cpp:442
void get_path_losses(float64_t **my_losses, int32_t *seq_len)
Definition: DynProg.cpp:877
CIntronList * m_intron_list
Definition: DynProg.h:730
int32_t m_num_raw_data
Definition: DynProg.h:750
const T & element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
Definition: DynamicArray.h:224
void init_tiling_data(int32_t *probe_pos, float64_t *intensities, const int32_t num_probes)
Definition: DynProg.cpp:239
virtual const char * get_name() const
Definition: DynProg.h:563
int32_t m_num_svms
Definition: DynProg.h:623
static bool sign_words_default[16]
Definition: DynProg.h:785
static int32_t word_degree_default[4]
Definition: DynProg.h:766
void set_num_states(int32_t N)
Definition: DynProg.cpp:219
void long_transition_settings(bool use_long_transitions, int32_t threshold, int32_t max_len)
Definition: DynProg.h:478
CSegmentLoss * m_seg_loss_obj
Definition: DynProg.h:706
CSegmentLoss * get_segment_loss_object()
Definition: DynProg.h:467
bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to)
Definition: DynProg.cpp:893
static int32_t string_words_default[16]
Definition: DynProg.h:788
static int32_t num_words_default[4]
Definition: DynProg.h:779
CDynamicArray< int32_t > m_my_state_seq
Definition: DynProg.h:696
void init_content_svm_value_array(const int32_t p_num_svms)
Definition: DynProg.cpp:269
void compute_nbest_paths(int32_t max_num_signals, bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences)
Definition: DynProg.cpp:944
void lookup_tiling_plif_values(const int32_t from_state, const int32_t to_state, const int32_t len, float64_t *svm_values)
store plif arrays for all transitions in the model
Definition: PlifMatrix.h:31
bool * m_sign_words_array
Definition: DynProg.h:642
void set_sparse_features(CSparseFeatures< float64_t > *seq_sparse1, CSparseFeatures< float64_t > *seq_sparse2)
Definition: DynProg.cpp:732
void set_a_id(SGMatrix< int32_t > a)
Definition: DynProg.cpp:465

SHOGUN Machine Learning Toolbox - Documentation