42 false,
false,
false,
false,
false,
false,
false,
false} ;
47 :
CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
48 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
49 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
50 m_end_state_distribution_q_deriv(1),
55 m_word_degree(word_degree_default, m_num_degrees, true, true),
56 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
57 m_cum_num_words_array(m_cum_num_words.get_array()),
58 m_num_words(num_words_default, m_num_degrees, true, true),
59 m_num_words_array(m_num_words.get_array()),
60 m_mod_words(mod_words_default, m_num_svms, 2, true, true),
61 m_mod_words_array(m_mod_words.get_array()),
62 m_sign_words(sign_words_default, m_num_svms, true, true),
63 m_sign_words_array(m_sign_words.get_array()),
64 m_string_words(string_words_default, m_num_svms, true, true),
65 m_string_words_array(m_string_words.get_array()),
67 m_num_unique_words(m_num_degrees),
68 m_svm_arrays_clean(true),
70 m_max_a_id(0), m_observation_matrix(1,1,1),
75 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2),
88 m_plif_matrices(NULL),
92 m_num_intron_plifs(0),
94 m_raw_intensities(NULL),
96 m_num_probes_cum(NULL),
97 m_num_lin_feat_plifs_cum(NULL),
100 m_long_transitions(true),
101 m_long_transition_threshold(1000)
103 trans_list_forward = NULL ;
104 trans_list_forward_cnt = NULL ;
105 trans_list_forward_val = NULL ;
106 trans_list_forward_id = NULL ;
109 mem_initialized = true ;
121 #ifdef ARRAY_STATISTICS
144 if (trans_list_forward_cnt)
145 SG_FREE(trans_list_forward_cnt);
146 if (trans_list_forward)
148 for (int32_t i=0; i<trans_list_len; i++)
150 if (trans_list_forward[i])
151 SG_FREE(trans_list_forward[i]);
153 SG_FREE(trans_list_forward);
155 if (trans_list_forward_val)
157 for (int32_t i=0; i<trans_list_len; i++)
159 if (trans_list_forward_val[i])
160 SG_FREE(trans_list_forward_val[i]);
162 SG_FREE(trans_list_forward_val);
164 if (trans_list_forward_id)
166 for (int32_t i=0; i<trans_list_len; i++)
168 if (trans_list_forward_id[i])
169 SG_FREE(trans_list_forward_id[i]);
171 SG_FREE(trans_list_forward_id);
204 for (int32_t i=0; i<length-2; i++)
240 int32_t* probe_pos,
float64_t* intensities,
const int32_t num_probes)
249 if (m_num_raw_data==1){
250 memcpy(tmp_probe_pos, probe_pos, num_probes*
sizeof(int32_t));
251 memcpy(tmp_raw_intensities, intensities, num_probes*
sizeof(
float64_t));
256 memcpy(tmp_probe_pos+
m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*
sizeof(int32_t));
274 for (
int s=0; s<p_num_svms; s++)
290 memset(tmp, 0, (dim1+num_new_feat)*dim2*
sizeof(
float64_t)) ;
293 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
313 CPlif** PEN,
const int32_t* tiling_plif_ids,
const int32_t num_tiling_plifs)
320 int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs);
321 for (int32_t i=0; i<num_tiling_plifs; i++)
324 CPlif * plif = PEN[tiling_plif_ids[i]];
336 for (int32_t pos_idx=0;pos_idx<
m_seq_len;pos_idx++)
340 for (int32_t i=0; i<num_tiling_plifs; i++)
343 CPlif * plif = PEN[tiling_plif_ids[i]];
353 for (int32_t i=0; i<num_tiling_plifs; i++)
357 SG_FREE(tiling_plif);
358 SG_FREE(tiling_rows);
373 m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len);
374 for (int32_t i=0; i<genestr_len; i++)
399 int32_t from_pos =
m_pos[p];
400 int32_t to_pos =
m_pos[p+1];
408 my_svm_values_unnormalized[s]=0.0;
410 for (int32_t i=from_pos; i<to_pos; i++)
428 if (prev<-1e20 || prev>1e20)
430 SG_ERROR(
"initialization missing (%i, %i, %f)\n", s, p, prev)
435 SG_FREE(my_svm_values_unnormalized);
445 SG_ERROR(
"length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.
vlen,
m_N)
453 SG_ERROR(
"length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.
vlen,
m_N)
471 for (int32_t i=0; i<
m_N; i++)
473 for (int32_t j=0; j<
m_N; j++)
485 if (!((num_cols==3) || (num_cols==4)))
486 SG_ERROR(
"!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols)
488 SG_FREE(trans_list_forward);
489 SG_FREE(trans_list_forward_cnt);
490 SG_FREE(trans_list_forward_val);
491 SG_FREE(trans_list_forward_id);
493 trans_list_forward = NULL ;
494 trans_list_forward_cnt = NULL ;
495 trans_list_forward_val = NULL ;
501 mem_initialized = true ;
503 trans_list_forward_cnt=NULL ;
504 trans_list_len =
m_N ;
508 trans_list_forward_id = SG_MALLOC(int32_t*,
m_N);
511 for (int32_t j=0; j<
m_N; j++)
513 int32_t old_start_idx=start_idx;
515 while (start_idx<num_trans && a_trans.
matrix[start_idx+num_trans]==j)
519 if (start_idx>1 && start_idx<num_trans)
523 if (start_idx>1 && start_idx<num_trans)
526 int32_t len=start_idx-old_start_idx;
529 trans_list_forward_cnt[j] = 0 ;
533 trans_list_forward[j] = SG_MALLOC(
T_STATES, len);
534 trans_list_forward_val[j] = SG_MALLOC(
float64_t, len);
535 trans_list_forward_id[j] = SG_MALLOC(int32_t, len);
539 trans_list_forward[j] = NULL;
540 trans_list_forward_val[j] = NULL;
541 trans_list_forward_id[j] = NULL;
545 for (int32_t i=0; i<num_trans; i++)
547 int32_t from_state = (int32_t)a_trans.
matrix[i] ;
548 int32_t to_state = (int32_t)a_trans.
matrix[i+num_trans] ;
552 id = (int32_t)a_trans.
matrix[i+num_trans*3] ;
555 ASSERT(to_state>=0 && to_state<m_N)
556 ASSERT(from_state>=0 && from_state<m_N)
558 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
559 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
560 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
561 trans_list_forward_cnt[to_state]++ ;
568 for (int32_t i=0; i<
m_N; i++)
569 for (int32_t j=0; j<
m_N; j++)
634 SG_WARNING(
"SVM array: word_degree.get_dim1()!=m_num_degrees")
636 SG_WARNING(
"SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1")
638 SG_WARNING(
"SVM array: m_num_words.get_dim1()==m_num_degrees")
642 SG_WARNING(
"SVM array: m_num_unique_words.get_dim1()!=m_num_degrees")
644 SG_WARNING(
"SVM array: m_mod_words.get_dim1()!=num_svms")
646 SG_WARNING(
"SVM array: m_mod_words.get_dim2()!=2")
648 SG_WARNING(
"SVM array: m_sign_words.get_dim1()!=num_svms")
650 SG_WARNING(
"SVM array: m_string_words.get_dim1()!=num_svms")
660 SG_ERROR(
"Expected 3-dimensional Matrix\n")
662 int32_t N=seq.
dims[0];
663 int32_t cand_pos=seq.
dims[1];
664 int32_t max_num_features=seq.
dims[2];
689 if (seg_path.
matrix!=NULL)
691 int32_t *segment_ids = SG_MALLOC(int32_t,
m_seq_len);
695 segment_ids[i] = (int32_t)seg_path.
matrix[2*i] ;
696 segment_mask[i] = seg_path.
matrix[2*i+1] ;
699 SG_FREE(segment_ids);
700 SG_FREE(segment_mask);
704 int32_t *izeros = SG_MALLOC(int32_t,
m_seq_len);
734 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
735 SG_ERROR(
"Sparse features must either both be NULL or both NON-NULL\n")
783 SG_ERROR(
"m_dict_weights array does not match num_svms=%i!=%i\n",
804 SG_ERROR(
"segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n)
816 int32_t* segment_ids,
float64_t* segment_mask, int32_t m)
822 for (int32_t i=1;i<m;i++)
869 int32_t sz =
sizeof(
float64_t)*(*seq_len);
871 *scores = SG_MALLOC(
float64_t, *seq_len);
883 int32_t sz =
sizeof(
float64_t)*(*seq_len);
885 *losses = SG_MALLOC(
float64_t, *seq_len);
894 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
897 #ifdef DYNPROG_TIMING_DETAIL
906 int32_t orf_target = orf_to-orf_from ;
907 if (orf_target<0) orf_target+=3 ;
917 #ifdef DYNPROG_TIMING_DETAIL
919 orf_time += MyTime.time_diff_sec() ;
924 for (; pos>=start; pos-=3)
927 #ifdef DYNPROG_TIMING_DETAIL
929 orf_time += MyTime.time_diff_sec() ;
937 #ifdef DYNPROG_TIMING_DETAIL
939 orf_time += MyTime.time_diff_sec() ;
945 int16_t nbest,
bool with_loss,
bool with_multiple_sequences)
960 for (int32_t i=0; i<nbest; i++)
978 #ifdef DYNPROG_TIMING
979 segment_init_time = 0.0 ;
980 segment_pos_time = 0.0 ;
981 segment_extend_time = 0.0 ;
982 segment_clean_time = 0.0 ;
984 svm_init_time = 0.0 ;
986 svm_clean_time = 0.0 ;
987 inner_loop_time = 0.0 ;
988 content_svm_values_time = 0.0 ;
989 content_plifs_time = 0.0 ;
990 inner_loop_max_time = 0.0 ;
991 long_transition_time = 0.0 ;
1002 #ifdef DYNPROG_DEBUG
1011 int32_t max_look_back = 1000 ;
1012 bool use_svm = false ;
1029 #ifdef DYNPROG_DEBUG
1033 SG_PRINT(
"m_num_lin_feat_plifs_cum: ")
1049 if (seq_array!=NULL)
1061 SG_PRINT(
"using sparse seq_array\n")
1065 ASSERT(max_num_signals==2)
1068 for (int32_t i=0; i<
m_N; i++)
1072 for (int32_t i=0; i<
m_N; i++)
1074 for (int32_t k=0; k<max_num_signals; k++)
1076 if ((PEN_state_signals.
element(i,k)==NULL) && (k==0))
1079 if (seq_input!=NULL)
1090 if (PEN_state_signals.
element(i,k)!=NULL)
1092 if (seq_input!=NULL)
1133 long_transition_content_start_position.
set_array_name(
"long_transition_content_start_position");
1134 #ifdef DYNPROG_DEBUG
1136 long_transition_content_end_position.
set_array_name(
"long_transition_content_end_position");
1139 long_transition_content_start.
set_array_name(
"long_transition_content_start");
1142 long_transition_content_scores.
set_array_name(
"long_transition_content_scores");
1143 #ifdef DYNPROG_DEBUG
1146 long_transition_content_scores_pen.
set_array_name(
"long_transition_content_scores_pen");
1149 long_transition_content_scores_prev.
set_array_name(
"long_transition_content_scores_prev");
1152 long_transition_content_scores_elem.
set_array_name(
"long_transition_content_scores_elem");
1155 long_transition_content_scores_loss.
set_array_name(
"long_transition_content_scores_loss");
1159 SG_ERROR(
"Long transitions are not supported for nbest!=1")
1160 long_transitions = false ;
1163 #ifdef DYNPROG_DEBUG
1164 long_transition_content_scores_pen.
set_const(0) ;
1165 long_transition_content_scores_elem.
set_const(0) ;
1166 long_transition_content_scores_prev.
set_const(0) ;
1169 long_transition_content_scores_loss.
set_const(0) ;
1170 long_transition_content_start.
set_const(0) ;
1171 long_transition_content_start_position.
set_const(0) ;
1172 #ifdef DYNPROG_DEBUG
1173 long_transition_content_end_position.
set_const(0) ;
1189 for (int32_t i=0; i<
m_N; i++)
1190 for (int32_t j=0; j<
m_N; j++)
1196 for (int32_t j=0; j<
m_N; j++)
1199 const T_STATES num_elem = trans_list_forward_cnt[j] ;
1200 const T_STATES *elem_list = trans_list_forward[j] ;
1202 for (int32_t i=0; i<num_elem; i++)
1209 if (long_transitions)
1239 if (long_transitions)
1245 int32_t num_long_transitions = 0 ;
1246 for (int32_t i=0; i<
m_N; i++)
1247 for (int32_t j=0; j<
m_N; j++)
1250 num_long_transitions++ ;
1253 if (long_transitions)
1265 SG_DEBUG(
"Using %i long transitions\n", num_long_transitions)
1272 SG_DEBUG(
"maxlook: %d m_N: %d nbest: %d \n", max_look_back,
m_N, nbest)
1273 const int32_t look_back_buflen = (max_look_back*
m_N+1)*nbest ;
1274 SG_DEBUG(
"look_back_buflen=%i\n", look_back_buflen)
1321 memset(fixedtempvv, 0, look_back_buflen*
sizeof(
float64_t)) ;
1322 int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen);
1323 memset(fixedtempii, 0, look_back_buflen*
sizeof(int32_t)) ;
1369 #ifdef USE_TMP_ARRAYCLASS
1370 fixedtempvv.set_array_name(
"fixedtempvv") ;
1371 fixedtempii.set_array_name(
"fixedtempvv") ;
1382 #ifdef DYNPROG_DEBUG
1395 PEN.display_size() ;
1396 PEN_state_signals.display_size() ;
1409 #ifdef USE_TMP_ARRAYCLASS
1410 fixedtempvv.display_size() ;
1411 fixedtempii.display_size() ;
1422 #endif //DYNPROG_DEBUG
1447 for (int16_t k=1; k<nbest; k++)
1449 int32_t dim1, dim2, dim3 ;
1493 for (int16_t k=0; k<nbest; k++)
1495 delta.
element(delta_array, t, j, k, m_seq_len, m_N) = seq.
element(j,t) ;
1504 const T_STATES num_elem = trans_list_forward_cnt[j] ;
1505 const T_STATES *elem_list = trans_list_forward[j] ;
1506 const float64_t *elem_val = trans_list_forward_val[j] ;
1507 const int32_t *elem_id = trans_list_forward_id[j] ;
1509 int32_t fixed_list_len = 0 ;
1511 int32_t fixedtempii_ = 0 ;
1512 bool fixedtemplong = false ;
1514 for (int32_t i=0; i<num_elem; i++)
1531 int32_t look_back_ = look_back.
element(j, ii) ;
1535 if((orf_from!=-1)!=(orf_to!=-1))
1536 SG_DEBUG(
"j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
1537 ASSERT((orf_from!=-1)==(orf_to!=-1))
1539 int32_t orf_target = -1 ;
1542 orf_target=orf_to-orf_from ;
1545 ASSERT(orf_target>=0 && orf_target<3)
1548 int32_t orf_last_pos =
m_pos[t] ;
1549 #ifdef DYNPROG_TIMING
1552 int32_t num_ok_pos = 0 ;
1554 for (int32_t ts=t-1; ts>=0 &&
m_pos[t]-
m_pos[ts]<=look_back_; ts--)
1568 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
1569 ok=(!use_orf) ||
extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
1587 int32_t frame = orf_from;
1593 #ifdef DYNPROG_TIMING_DETAIL
1596 pen_val = penalty->
lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
1598 #ifdef DYNPROG_TIMING_DETAIL
1600 content_plifs_time += MyTime.time_diff_sec() ;
1604 #ifdef DYNPROG_TIMING_DETAIL
1613 val += segment_loss ;
1615 float64_t mval = -(val + delta.
element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
1617 if (mval<fixedtempvv_)
1619 fixedtempvv_ = mval ;
1620 fixedtempii_ = ii + ts*
m_N;
1621 fixed_list_len = 1 ;
1622 fixedtemplong = false ;
1627 for (int16_t diff=0; diff<nbest; diff++)
1632 val += segment_loss ;
1634 float64_t mval = -(val + delta.
element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
1640 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
1642 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
1644 fixedtempvv[fixed_list_len] = mval ;
1645 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
1650 int32_t addhere = fixed_list_len;
1651 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
1655 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
1657 fixedtempvv[jj] = fixedtempvv[jj-1];
1658 fixedtempii[jj] = fixedtempii[jj-1];
1661 fixedtempvv[addhere] = mval;
1662 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
1664 if (fixed_list_len < nbest)
1670 #ifdef DYNPROG_TIMING_DETAIL
1672 inner_loop_max_time += MyTime.time_diff_sec() ;
1676 #ifdef DYNPROG_TIMING
1678 inner_loop_time += MyTime3.time_diff_sec() ;
1681 for (int32_t i=0; i<num_elem; i++)
1698 int32_t look_back_ = look_back.
element(j, ii) ;
1703 if((orf_from!=-1)!=(orf_to!=-1))
1704 SG_DEBUG(
"j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
1705 ASSERT((orf_from!=-1)==(orf_to!=-1))
1707 int32_t orf_target = -1 ;
1710 orf_target=orf_to-orf_from ;
1713 ASSERT(orf_target>=0 && orf_target<3)
1719 #ifdef DYNPROG_TIMING
1730 #ifdef DYNPROG_TIMING
1739 int32_t start = long_transition_content_start.
get_element(ii, j) ;
1740 int32_t end_5p_part = start ;
1744 while (end_5p_part<=t &&
m_pos[end_5p_part+1]-
m_pos[start_5p_part]<=m_long_transition_threshold)
1747 ASSERT(
m_pos[end_5p_part+1]-
m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t)
1748 ASSERT(
m_pos[end_5p_part]-
m_pos[start_5p_part] <= m_long_transition_threshold)
1765 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.
element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ;
1774 mval_trans -= segment_loss_part1 ;
1785 long_transition_content_start_position.
set_element(0, ii, j) ;
1787 long_transition_content_scores_loss.
set_element(0.0, ii, j) ;
1788 #ifdef DYNPROG_DEBUG
1789 long_transition_content_scores_pen.
set_element(0.0, ii, j) ;
1790 long_transition_content_scores_elem.
set_element(0.0, ii, j) ;
1791 long_transition_content_scores_prev.
set_element(0.0, ii, j) ;
1792 long_transition_content_end_position.
set_element(0, ii, j) ;
1800 long_transition_content_scores.
set_element(score, ii, j) ;
1801 long_transition_content_scores_loss.
set_element(new_loss, ii, j) ;
1802 #ifdef DYNPROG_DEBUG
1803 long_transition_content_end_position.
set_element(end_5p_part, ii, j) ;
1807 if (-long_transition_content_scores.
get_element(ii, j) > mval_trans )
1810 long_transition_content_scores.
set_element(-mval_trans, ii, j) ;
1811 long_transition_content_start_position.
set_element(start_5p_part, ii, j) ;
1813 long_transition_content_scores_loss.
set_element(segment_loss_part1, ii, j) ;
1814 #ifdef DYNPROG_DEBUG
1815 long_transition_content_scores_pen.
set_element(pen_val*0.5, ii, j) ;
1816 long_transition_content_scores_elem.
set_element(elem_val[i], ii, j) ;
1817 long_transition_content_scores_prev.
set_element(delta.
element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ;
1821 long_transition_content_end_position.
set_element(end_5p_part, ii, j) ;
1828 long_transition_content_start.
set_element(start_5p_part, ii, j) ;
1838 while (ts>0 &&
m_pos[t]-
m_pos[ts-1] <= m_long_transition_threshold)
1847 float pen_val_3p = 0.0 ;
1850 int32_t frame = orf_from ;
1858 #ifdef DYNPROG_DEBUG
1868 #ifdef DYNPROG_DEBUG
1875 mval -= (segment_loss_total-long_transition_content_scores_loss.
get_element(ii, j)) ;
1878 #ifdef DYNPROG_DEBUG
1881 SG_PRINT(
"Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i) scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n",
1883 long_transition_content_scores.
get_element(ii, j),
1884 long_transition_content_scores_pen.
get_element(ii, j),
1885 long_transition_content_scores_prev.
get_element(ii, j),
1886 long_transition_content_scores_elem.
get_element(ii, j),
1887 long_transition_content_scores_loss.
get_element(ii, j),
1890 m_pos[long_transition_content_start.
get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.
get_element(ii,j), t) ;
1891 SG_PRINT(
"fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] )
1894 if (fabs(segment_loss_part2+long_transition_content_scores_loss.
get_element(ii, j) - segment_loss_total)>1e-3)
1896 SG_ERROR(
"LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n",
1898 long_transition_content_scores_loss.
get_element(ii, j), segment_loss_part1,
m_pos[long_transition_content_start_position.
get_element(ii,j)], m_pos[long_transition_content_end_position.
get_element(ii,j)],
1899 segment_loss_part2, m_pos[long_transition_content_end_position.
get_element(ii,j)], m_pos[t],
1900 segment_loss_part2+long_transition_content_scores_loss.
get_element(ii, j),
1901 segment_loss_part2+long_transition_content_scores_loss.
get_element(ii, j) - segment_loss_total) ;
1911 if (mval < fixedtempvv_)
1914 int32_t fromtjk = fixedtempii_ ;
1920 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 ||
m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=
m_pos[long_transition_content_start_position.
get_element(ii, j)] || fixedtemplong)
1922 fixedtempvv_ = mval ;
1923 fixedtempii_ = ii + m_N*long_transition_content_start_position.
get_element(ii, j) ;
1924 fixed_list_len = 1 ;
1925 fixedtemplong = true ;
1930 #ifdef DYNPROG_TIMING
1932 long_transition_time += MyTime3.time_diff_sec() ;
1936 int32_t numEnt = fixed_list_len;
1941 for (int16_t k=0; k<nbest; k++)
1947 minusscore = fixedtempvv_ ;
1948 fromtjk = fixedtempii_ ;
1952 minusscore = fixedtempvv[k];
1953 fromtjk = fixedtempii[k];
1956 delta.
element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.
element(j,t);
1959 ktable.
element(t,j,k) = (fromtjk%(m_N*nbest)-psi.
element(t,j,k))/m_N ;
1960 ptable.
element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
1975 int32_t list_len = 0 ;
1976 for (int16_t diff=0; diff<nbest; diff++)
1980 oldtempvv[list_len] = -(delta.
element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+
get_q(i)) ;
1981 oldtempii[list_len] = i + diff*
m_N ;
1988 for (int16_t k=0; k<nbest; k++)
1990 delta_end.
element(k) = -oldtempvv[k] ;
2000 for (int16_t k=0; k<nbest; k++)
2002 prob_nbest[k]= delta_end.
element(k) ;
2005 state_seq[i] = path_ends.
element(k) ;
2009 pos_seq[i] = m_seq_len-1 ;
2011 while (pos_seq[i]>0)
2015 state_seq[i+1] = psi.
element(pos_seq[i], state_seq[i], q);
2016 pos_seq[i+1] = ptable.
element(pos_seq[i], state_seq[i], q) ;
2018 q = ktable.
element(pos_seq[i], state_seq[i], q) ;
2022 int32_t num_states = i+1 ;
2023 for (i=0; i<num_states;i++)
2025 my_state_seq[i+k*
m_seq_len] = state_seq[num_states-i-1] ;
2026 my_pos_seq[i+k*
m_seq_len] = pos_seq[num_states-i-1] ;
2028 if (num_states<m_seq_len)
2030 my_state_seq[num_states+k*
m_seq_len]=-1 ;
2040 #ifdef DYNPROG_TIMING
2044 SG_PRINT(
"Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec())
2047 SG_FREE(fixedtempvv);
2048 SG_FREE(fixedtempii);
2053 int32_t *my_state_seq, int32_t *my_pos_seq,
2054 int32_t my_seq_len,
const float64_t *seq_array, int32_t max_num_signals)
2078 bool use_svm = false ;
2090 for (int32_t i=0; i<
m_N; i++)
2091 for (int32_t j=0; j<
m_N; j++)
2101 for (int32_t i=0; i<
m_N; i++)
2102 for (int32_t j=0; j<max_num_signals; j++)
2115 for (int32_t i=0; i<
m_N; i++)
2119 for (int32_t j=0; j<
m_N; j++)
2125 for (int32_t i=0; i<my_seq_len; i++)
2144 svm_value_part1[s]=0 ;
2145 svm_value_part2[s]=0 ;
2153 ASSERT(my_state_seq[0]>=0)
2157 ASSERT(my_state_seq[my_seq_len-1]>=0)
2162 total_score += my_scores[0] + my_scores[my_seq_len-1] ;
2165 SG_DEBUG(
"m_seq_len=%i\n", my_seq_len)
2166 for (int32_t i=0; i<my_seq_len-1; i++)
2168 if (my_state_seq[i+1]==-1)
2170 int32_t from_state = my_state_seq[i] ;
2171 int32_t to_state = my_state_seq[i+1] ;
2172 int32_t from_pos = my_pos_seq[i] ;
2173 int32_t to_pos = my_pos_seq[i+1] ;
2178 #ifdef DYNPROG_DEBUG
2186 SG_PRINT(
"loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3)
2189 SG_PRINT(
"%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
2193 SG_DEBUG(
"%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
2199 #ifdef DYNPROG_DEBUG
2200 SG_DEBUG(
"%i. scores[i]=%f\n", i, my_scores[i])
2207 bool is_long_transition = false ;
2211 is_long_transition = true ;
2213 is_long_transition =
false ;
2216 int32_t from_pos_thresh = from_pos ;
2217 int32_t to_pos_thresh = to_pos ;
2221 if (is_long_transition)
2226 ASSERT(from_pos_thresh<to_pos)
2233 #ifdef DYNPROG_DEBUG
2234 SG_PRINT(
"part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ",
m_pos[from_pos],
m_pos[from_pos_thresh],
m_pos[from_pos_thresh+1])
2236 SG_PRINT(
"%1.4f ", svm_value_part1[s])
2248 #ifdef DYNPROG_DEBUG
2249 SG_PRINT(
"part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ",
m_pos[to_pos],
m_pos[to_pos_thresh],
m_pos[to_pos_thresh+1])
2251 SG_PRINT(
"%1.4f ", svm_value_part2[s])
2263 int32_t num_current_svms=0;
2264 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
2265 SG_PRINT(
"penalties(%i, %i), frame:%i ", from_state, to_state, frame)
2266 ((
CPlifBase*) PEN.
element(to_state, from_state))->get_used_svms(&num_current_svms, svm_ids);
2271 #ifdef DYNPROG_DEBUG
2280 if (PEN.
element(to_state, from_state)!=NULL)
2283 if (is_long_transition)
2287 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
2293 SG_PRINT(
"is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n",
2294 is_long_transition,
m_pos[from_pos], from_state,
m_pos[to_pos], to_state, nscore) ;
2296 my_scores[i] += nscore ;
2305 #ifdef DYNPROG_DEBUG
2308 if (is_long_transition)
2310 #ifdef DYNPROG_DEBUG
2313 for (
int kk=0; kk<i; kk++)
2314 sum_score += my_scores[i] ;
2316 SG_PRINT(
"is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n",
2317 is_long_transition,
m_pos[from_pos], from_state,
m_pos[to_pos], to_state,
2319 PEN.
element(to_state, from_state)->lookup_penalty(
m_pos[from_pos_thresh]-
m_pos[from_pos], svm_value_part1)*0.5,
m_pos[from_pos],
m_pos[from_pos_thresh],
2320 PEN.
element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
2324 if (is_long_transition)
2326 ((
CPlifBase*) PEN.
element(to_state, from_state))->penalty_add_derivative(
m_pos[from_pos_thresh]-
m_pos[from_pos], svm_value_part1, 0.5) ;
2327 ((
CPlifBase*) PEN.
element(to_state, from_state))->penalty_add_derivative(
m_pos[to_pos]-
m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
2338 if (is_long_transition)
2346 for (int32_t k=0;k<num_intensities;k++)
2348 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2349 svm_value[j]=intensities[k];
2355 for (int32_t k=0;k<num_intensities;k++)
2357 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2358 svm_value[j]=intensities[k];
2363 SG_FREE(intensities);
2376 for (int32_t k=0;k<num_intensities;k++)
2378 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2379 svm_value[j]=intensities[k];
2384 SG_FREE(intensities);
2389 #ifdef DYNPROG_DEBUG
2390 SG_DEBUG(
"%i. scores[i]=%f\n", i, my_scores[i])
2394 for (int32_t k=0; k<max_num_signals; k++)
2396 if ((PEN_state_signals.
element(to_state,k)==NULL)&&(k==0))
2398 #ifdef DYNPROG_DEBUG
2399 SG_DEBUG(
"%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.
element(to_state, to_pos, k))
2401 my_scores[i] += seq_input.
element(to_state, to_pos, k) ;
2406 if (PEN_state_signals.
element(to_state, k)!=NULL)
2409 my_scores[i] += nscore ;
2410 #ifdef DYNPROG_DEBUG
2413 SG_PRINT(
"is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n",
2414 is_long_transition,
m_pos[from_pos], from_pos, from_state,
m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.
element(to_state, to_pos, k)) ;
2415 for (
int x=0; x<23; x++)
2417 for (
int i=-10; i<10; i++)
2432 #ifdef DYNPROG_DEBUG
2433 SG_DEBUG(
"%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.
element(to_state, to_pos, k), nscore, k)
2435 ((
CPlifBase*) PEN_state_signals.
element(to_state,k))->penalty_add_derivative(seq_input.
element(to_state, to_pos, k), svm_value, 1) ;
2443 total_score += my_scores[i] ;
2444 total_loss += my_losses[i] ;
2452 SG_FREE(svm_value_part1);
2453 SG_FREE(svm_value_part2);
2459 int32_t num_intensities = 0;
2463 int32_t num = m_num_probes_cum[type-1];
2464 while (*p_tiling_pos<to_pos)
2466 if (*p_tiling_pos>=from_pos)
2468 intensities[num_intensities] = *p_tiling_data;
2472 if (num>=m_num_probes_cum[type])
2474 last_pos = *p_tiling_pos;
2477 ASSERT(last_pos<*p_tiling_pos)
2479 return num_intensities;
2484 #ifdef DYNPROG_TIMING_DETAIL
2494 svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
2500 svm_values[i] = to_val-from_val ;
2506 int32_t intron_list_start = m_num_lin_feat_plifs_cum[
m_num_raw_data];
2509 for (int32_t i=intron_list_start; i<intron_list_end;i++)
2511 svm_values[i] = (
float64_t) (support[cnt]);
2522 svm_values[frame_plifs[1]] = 1e10;
2523 svm_values[frame_plifs[2]] = 1e10;
2524 int32_t global_frame = from_pos%3;
2525 int32_t row = ((global_frame+frame)%3)+4;
2528 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
2530 #ifdef DYNPROG_TIMING_DETAIL
2532 content_svm_values_time += MyTime.time_diff_sec() ;
virtual float64_t get_max_value() const =0
CDynamicArray< float64_t > m_end_state_distribution_q_deriv
void set_loglevel(EMessageType level)
CDynamicArray< float64_t > m_segment_loss
CPlifMatrix * m_plif_matrices
CDynamicArray< int32_t > m_positions
void best_path_trans_deriv(int32_t *my_state_seq, int32_t *my_pos_seq, int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
CDynamicArray< float64_t > m_dict_weights
static int is_finite(double f)
checks whether a float is finite
static int32_t cum_num_words_default[5]
CDynamicArray< int32_t > m_segment_ids
void set_dict_weights(SGMatrix< float64_t > dictionary_weights)
void set_my_state_seq(int32_t *my_state_seq)
static float64_t ceil(float64_t d)
void set_plif_matrices(CPlifMatrix *pm)
static const float64_t INFTY
infinity
void create_word_string()
int32_t m_N
number of states
static void nmin(float64_t *output, T *index, int32_t size, int32_t n)
void set_const(const T &const_element)
void set_observation_matrix(SGNDArray< float64_t > seq)
void set_gene_string(SGVector< char > genestr)
void set_orf_info(SGMatrix< int32_t > orf_info)
void get_path_scores(float64_t **my_scores, int32_t *seq_len)
CPlifBase ** get_plif_matrix()
CDynamicArray< int32_t > m_transition_matrix_a_id
transition matrix
float32_t get_segment_loss(int32_t from_pos, int32_t to_pos, int32_t segment_id)
static void translate_from_single_order(ST *obs, int32_t sequence_length, int32_t start, int32_t p_order, int32_t max_val)
CDynamicArray< int32_t > m_word_degree
int32_t * m_cum_num_words_array
SGVector< float64_t > get_scores()
float64_t get_q(T_STATES offset) const
int32_t * m_mod_words_array
CDynamicArray< float64_t > m_lin_feat
void compute_loss(int32_t *all_pos, int32_t len)
CPlifBase ** get_state_signals()
float64_t get_p(T_STATES offset) const
int32_t raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t *intensities, int32_t type)
int32_t m_num_intron_plifs
void best_path_set_segment_ids_mask(int32_t *segment_ids, float64_t *segment_mask, int32_t m)
void set_pos(SGVector< int32_t > pos)
virtual float64_t lookup_penalty(float64_t p_value, float64_t *svm_values) const =0
CDynamicArray< float64_t > m_initial_state_distribution_p_deriv
int32_t get_num_positions()
CDynProg(int32_t p_num_svms=8)
CSparseFeatures< float64_t > * m_seq_sparse1
CSparseFeatures< float64_t > * m_seq_sparse2
int32_t * m_num_probes_cum
float32_t get_segment_loss_extend(int32_t from_pos, int32_t to_pos, int32_t segment_id)
void precompute_content_values()
int32_t * m_num_lin_feat_plifs_cum
CDynamicArray< float64_t > m_initial_state_distribution_p
initial distribution of states
CDynamicArray< int32_t > m_mod_words
ST get_feature(int32_t num, int32_t index)
void init_mod_words_array(SGMatrix< int32_t > p_mod_words_array)
void set_a(SGMatrix< float64_t > a)
CDynamicArray< float64_t > m_transition_matrix_a_deriv
void set_intron_list(CIntronList *intron_list, int32_t num_plifs)
Class SGObject is the base class of all shogun objects.
bool set_element(T e, int32_t idx1, int32_t idx2=0, int32_t idx3=0)
void set_q_vector(SGVector< float64_t > q)
CDynamicArray< int32_t > m_num_unique_words
CSGObject * element(int32_t idx1, int32_t idx2=0, int32_t idx3=0)
CDynamicArray< float64_t > m_transition_matrix_a
static int32_t mod_words_default[32]
void resize_lin_feat(int32_t num_new_feat)
void set_segment_ids(CDynamicArray< int32_t > *segment_ids)
void precompute_stop_codons()
CDynamicArray< bool > m_genestr_stop
void lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t *svm_values, int32_t frame)
CDynamicArray< int32_t > m_orf_info
CDynamicArray< float64_t > m_segment_sum_weights
SGMatrix< int32_t > get_positions()
int32_t * m_string_words_array
virtual bool uses_svm_values() const =0
CDynamicArray< int32_t > m_my_pos_seq
SGMatrix< int32_t > get_states()
CDynamicArray< int32_t > m_states
static int32_t frame_plifs[3]
CDynamicArray< int32_t > m_cum_num_words
void best_path_set_segment_loss(SGMatrix< float64_t > segment_loss)
CDynamicArray< int32_t > m_string_words
CDynamicArray< int32_t > m_pos
void set_content_type_array(SGMatrix< float64_t > seg_path)
int32_t get_use_svm() const
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
void precompute_tiling_plifs(CPlif **PEN, const int32_t *tiling_plif_ids, const int32_t num_tiling_plifs)
int32_t m_long_transition_threshold
CDynamicArray< float64_t > m_end_state_distribution_q
distribution of end-states
bool resize_array(int32_t ndim1, int32_t ndim2=1, int32_t ndim3=1)
void set_a_trans_matrix(SGMatrix< float64_t > a_trans)
float64_t * m_raw_intensities
CDynamicArray< float64_t > m_my_losses
CDynamicArray< float64_t > m_segment_mask
CDynamicArray< char > m_genestr
void set_my_pos_seq(int32_t *my_pos_seq)
CDynamicArray< float64_t > m_scores
void set_array(T *p_array, int32_t p_num_elements, int32_t array_size)
void set_array_name(const char *p_name)
all of classes and functions are contained in the shogun namespace
CDynamicArray< float64_t > m_observation_matrix
CDynamicArray< bool > m_sign_words
void get_intron_support(int32_t *values, int32_t from_pos, int32_t to_pos)
CDynamicArray< int32_t > m_num_words
CDynamicArray< float64_t > m_my_scores
void set_p_vector(SGVector< float64_t > p)
float64_t lookup_penalty(float64_t p_value, float64_t *svm_values) const
void get_path_losses(float64_t **my_losses, int32_t *seq_len)
CIntronList * m_intron_list
const T & element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
void init_tiling_data(int32_t *probe_pos, float64_t *intensities, const int32_t num_probes)
virtual void penalty_clear_derivative()=0
static bool sign_words_default[16]
static int32_t word_degree_default[4]
void set_num_states(int32_t N)
CSegmentLoss * m_seg_loss_obj
void set_array_name(const char *p_name)
bool extend_orf(int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos, int32_t to)
static int32_t string_words_default[16]
static int32_t num_words_default[4]
CDynamicArray< int32_t > m_my_state_seq
void set_segment_mask(CDynamicArray< float64_t > *segment_mask)
void init_content_svm_value_array(const int32_t p_num_svms)
const T & get_element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
void compute_nbest_paths(int32_t max_num_signals, bool use_orf, int16_t nbest, bool with_loss, bool with_multiple_sequences)
store plif arrays for all transitions in the model
void set_sparse_features(CSparseFeatures< float64_t > *seq_sparse1, CSparseFeatures< float64_t > *seq_sparse2)
void set_a_id(SGMatrix< int32_t > a)