SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DynProg.h
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 1999-2009 Gunnar Raetsch
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Written (W) 2008-2009 Jonas Behr
10  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
11  */
12 
13 #ifndef __CDYNPROG_H__
14 #define __CDYNPROG_H__
15 
17 #include <shogun/lib/common.h>
18 #include <shogun/base/SGObject.h>
19 #include <shogun/io/SGIO.h>
20 #include <shogun/lib/config.h>
23 #include <shogun/structure/Plif.h>
31 #include <shogun/lib/Time.h>
32 
33 #include <stdio.h>
34 #include <limits.h>
35 
36 namespace shogun
37 {
38  template <class T> class CSparseFeatures;
39  class CIntronList;
40  class CPlifMatrix;
41  class CSegmentLoss;
42 
43  template <class T> class CDynamicArray;
44 
45 //#define DYNPROG_TIMING
46 
47 #ifdef USE_BIGSTATES
48 typedef uint16_t T_STATES ;
49 #else
50 typedef uint8_t T_STATES ;
51 #endif
52 typedef T_STATES* P_STATES ;
53 
54 #ifndef DOXYGEN_SHOULD_SKIP_THIS
55 
56 struct segment_loss_struct
57 {
59  int32_t maxlookback;
61  int32_t seqlen;
63  int32_t *segments_changed;
65  float64_t *num_segment_id;
67  int32_t *length_segment_id ;
68 };
69 #endif
70 
76 class CDynProg : public CSGObject
77 {
78 public:
83  CDynProg(int32_t p_num_svms=8);
84  virtual ~CDynProg();
85 
86  // model related functions
92  void set_num_states(int32_t N);
93 
95  int32_t get_num_states();
96 
98  int32_t get_num_svms();
99 
105  void init_content_svm_value_array(const int32_t p_num_svms);
106 
114  void init_tiling_data(int32_t* probe_pos, float64_t* intensities, const int32_t num_probes);
115 
122  void precompute_tiling_plifs(CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs);
123 
128  void resize_lin_feat(int32_t num_new_feat);
134 
140 
145  void set_a(SGMatrix<float64_t> a);
146 
151  void set_a_id(SGMatrix<int32_t> a);
152 
158 
163  void init_mod_words_array(SGMatrix<int32_t> p_mod_words_array);
164 
170  bool check_svm_arrays();
171 
177 
184  int32_t get_num_positions();
185 
196 
201  void set_pos(SGVector<int32_t> pos);
202 
208  void set_orf_info(SGMatrix<int32_t> orf_info);
209 
214  void set_gene_string(SGVector<char> genestr);
215 
216 
221  void set_dict_weights(SGMatrix<float64_t> dictionary_weights);
222 
228 
235  void best_path_set_segment_ids_mask(int32_t* segment_ids, float64_t* segment_mask, int32_t m);
236 
239 
244  void set_plif_matrices(CPlifMatrix* pm);
245 
246  // best_path result retrieval functions
252 
258 
264 
265 
274  void compute_nbest_paths(int32_t max_num_signals,
275  bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences);
276 
278 
291  int32_t* my_state_seq, int32_t *my_pos_seq,
292  int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals);
293 
294  // additional best_path_trans_deriv functions
299  void set_my_state_seq(int32_t* my_state_seq);
300 
305  void set_my_pos_seq(int32_t* my_pos_seq);
306 
314  void get_path_scores(float64_t** my_scores, int32_t* seq_len);
315 
323  void get_path_losses(float64_t** my_losses, int32_t* seq_len);
324 
325 
327  inline T_STATES get_N() const
328  {
329  return m_N ;
330  }
331 
336  inline void set_q(T_STATES offset, float64_t value)
337  {
338  m_end_state_distribution_q[offset]=value;
339  }
340 
345  inline void set_p(T_STATES offset, float64_t value)
346  {
347  m_initial_state_distribution_p[offset]=value;
348  }
349 
356  inline void set_a(T_STATES line_, T_STATES column, float64_t value)
357  {
358  m_transition_matrix_a.element(line_,column)=value; // look also best_path!
359  }
360 
366  inline float64_t get_q(T_STATES offset) const
367  {
368  return m_end_state_distribution_q[offset];
369  }
370 
376  inline float64_t get_q_deriv(T_STATES offset) const
377  {
378  return m_end_state_distribution_q_deriv[offset];
379  }
380 
386  inline float64_t get_p(T_STATES offset) const
387  {
388  return m_initial_state_distribution_p[offset];
389  }
390 
396  inline float64_t get_p_deriv(T_STATES offset) const
397  {
399  }
400 
405 
412  inline float64_t* get_lin_feat(int32_t & dim1, int32_t & dim2)
413  {
414  m_lin_feat.get_array_size(dim1, dim2);
415  return m_lin_feat.get_array();
416  }
425  inline void set_lin_feat(float64_t* p_lin_feat, int32_t p_num_svms, int32_t p_seq_len)
426  {
427  m_lin_feat.set_array(p_lin_feat, p_num_svms, p_seq_len, true, true);
428  }
433  void create_word_string();
434 
437  void precompute_stop_codons();
438 
445  inline float64_t get_a(T_STATES line_, T_STATES column) const
446  {
447  return m_transition_matrix_a.element(line_, column); // look also best_path()!
448  }
449 
456  inline float64_t get_a_deriv(T_STATES line_, T_STATES column) const
457  {
458  return m_transition_matrix_a_deriv.element(line_, column); // look also best_path()!
459  }
461 
466  void set_intron_list(CIntronList* intron_list, int32_t num_plifs);
467 
470  {
471  return m_seg_loss_obj;
472  }
473 
480  void long_transition_settings(bool use_long_transitions, int32_t threshold, int32_t max_len)
481  {
482  m_long_transitions = use_long_transitions;
483  m_long_transition_threshold = threshold;
484  SG_DEBUG("ignoring max_len\n")
485  //m_long_transition_max = max_len;
486  }
487 
488 protected:
489 
490  /* helper functions */
491 
501  void lookup_content_svm_values(const int32_t from_state,
502  const int32_t to_state, const int32_t from_pos, const int32_t to_pos,
503  float64_t* svm_values, int32_t frame);
504 
512  inline void lookup_tiling_plif_values(const int32_t from_state,
513  const int32_t to_state, const int32_t len, float64_t* svm_values);
514 
519  inline int32_t find_frame(const int32_t from_state);
520 
529  inline int32_t raw_intensities_interval_query(
530  const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type);
531 
532 #ifndef DOXYGEN_SHOULD_SKIP_THIS
533 
534  struct svm_values_struct
535  {
537  int32_t maxlookback;
539  int32_t seqlen;
540 
542  int32_t* start_pos;
544  float64_t ** svm_values_unnormalized;
546  float64_t * svm_values;
548  bool *** word_used;
550  int32_t **num_unique_words;
551  };
552 #endif // DOXYGEN_SHOULD_SKIP_THIS
553 
562  bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to);
563 
565  virtual const char* get_name() const { return "DynProg"; }
566 
567 private:
568 
569  T_STATES trans_list_len;
570  T_STATES **trans_list_forward;
571  T_STATES *trans_list_forward_cnt;
572  float64_t **trans_list_forward_val;
573  int32_t **trans_list_forward_id;
574  bool mem_initialized;
575 
576 #ifdef DYNPROG_TIMING
577  CTime MyTime;
578  CTime MyTime2;
579  CTime MyTime3;
580 
581  float64_t segment_init_time;
582  float64_t segment_pos_time;
583  float64_t segment_clean_time;
584  float64_t segment_extend_time;
585  float64_t orf_time;
586  float64_t content_time;
587  float64_t content_penalty_time;
588  float64_t content_svm_values_time ;
589  float64_t content_plifs_time ;
590  float64_t svm_init_time;
591  float64_t svm_pos_time;
592  float64_t inner_loop_time;
593  float64_t inner_loop_max_time ;
594  float64_t svm_clean_time;
595  float64_t long_transition_time ;
596 #endif
597 
598 
599 protected:
604 
605  int32_t m_N;
606 
611 
615 
619 
621 
623  int32_t m_num_degrees;
625  int32_t m_num_svms;
626 
649 
651 // CDynamicArray<int32_t> m_svm_pos_start;
657  int32_t m_max_a_id;
658 
659  // input arguments
665  int32_t m_seq_len;
688  uint16_t*** m_wordstr;
705 
709 
710  // output arguments
717 
724 
729 
733 
736 
742 
746  int32_t* m_probe_pos;
752  int32_t m_num_raw_data;
753 
763  //int32_t m_long_transition_max ;
764 
768  static int32_t word_degree_default[4];
769 
773  static int32_t cum_num_words_default[5];
774 
777  static int32_t frame_plifs[3];
778 
781  static int32_t num_words_default[4];
782 
784  static int32_t mod_words_default[32];
785 
787  static bool sign_words_default[16];
788 
790  static int32_t string_words_default[16];
791 };
792 }
793 #endif

SHOGUN Machine Learning Toolbox - Documentation