27 using namespace shogun;
41 false,
false,
false,
false,
false,
false,
false,
false} ;
46 :
CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
47 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
48 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
49 m_end_state_distribution_q_deriv(1),
54 m_word_degree(word_degree_default, m_num_degrees, true, true),
55 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
56 m_cum_num_words_array(m_cum_num_words.get_array()),
57 m_num_words(num_words_default, m_num_degrees, true, true),
58 m_num_words_array(m_num_words.get_array()),
59 m_mod_words(mod_words_default, m_num_svms, 2, true, true),
60 m_mod_words_array(m_mod_words.get_array()),
61 m_sign_words(sign_words_default, m_num_svms, true, true),
62 m_sign_words_array(m_sign_words.get_array()),
63 m_string_words(string_words_default, m_num_svms, true, true),
64 m_string_words_array(m_string_words.get_array()),
66 m_num_unique_words(m_num_degrees),
67 m_svm_arrays_clean(true),
69 m_max_a_id(0), m_observation_matrix(1,1,1),
74 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2),
87 m_plif_matrices(NULL),
91 m_num_intron_plifs(0),
93 m_raw_intensities(NULL),
95 m_num_probes_cum(NULL),
96 m_num_lin_feat_plifs_cum(NULL),
99 m_long_transitions(true),
100 m_long_transition_threshold(1000)
102 trans_list_forward = NULL ;
103 trans_list_forward_cnt = NULL ;
104 trans_list_forward_val = NULL ;
105 trans_list_forward_id = NULL ;
108 mem_initialized = true ;
120 #ifdef ARRAY_STATISTICS
143 if (trans_list_forward_cnt)
144 SG_FREE(trans_list_forward_cnt);
145 if (trans_list_forward)
147 for (int32_t i=0; i<trans_list_len; i++)
149 if (trans_list_forward[i])
150 SG_FREE(trans_list_forward[i]);
152 SG_FREE(trans_list_forward);
154 if (trans_list_forward_val)
156 for (int32_t i=0; i<trans_list_len; i++)
158 if (trans_list_forward_val[i])
159 SG_FREE(trans_list_forward_val[i]);
161 SG_FREE(trans_list_forward_val);
163 if (trans_list_forward_id)
165 for (int32_t i=0; i<trans_list_len; i++)
167 if (trans_list_forward_id[i])
168 SG_FREE(trans_list_forward_id[i]);
170 SG_FREE(trans_list_forward_id);
203 for (int32_t i=0; i<length-2; i++)
239 int32_t* probe_pos,
float64_t* intensities,
const int32_t num_probes)
248 if (m_num_raw_data==1){
249 memcpy(tmp_probe_pos, probe_pos, num_probes*
sizeof(int32_t));
250 memcpy(tmp_raw_intensities, intensities, num_probes*
sizeof(
float64_t));
255 memcpy(tmp_probe_pos+
m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*
sizeof(int32_t));
273 for (
int s=0; s<p_num_svms; s++)
289 memset(tmp, 0, (dim1+num_new_feat)*dim2*
sizeof(
float64_t)) ;
292 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
312 CPlif** PEN,
const int32_t* tiling_plif_ids,
const int32_t num_tiling_plifs)
319 int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs);
320 for (int32_t i=0; i<num_tiling_plifs; i++)
323 CPlif * plif = PEN[tiling_plif_ids[i]];
335 for (int32_t pos_idx=0;pos_idx<
m_seq_len;pos_idx++)
339 for (int32_t i=0; i<num_tiling_plifs; i++)
342 CPlif * plif = PEN[tiling_plif_ids[i]];
352 for (int32_t i=0; i<num_tiling_plifs; i++)
356 SG_FREE(tiling_plif);
357 SG_FREE(tiling_rows);
372 m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len);
373 for (int32_t i=0; i<genestr_len; i++)
398 int32_t from_pos =
m_pos[p];
399 int32_t to_pos =
m_pos[p+1];
407 my_svm_values_unnormalized[s]=0.0;
409 for (int32_t i=from_pos; i<to_pos; i++)
427 if (prev<-1e20 || prev>1e20)
429 SG_ERROR(
"initialization missing (%i, %i, %f)\n", s, p, prev)
434 SG_FREE(my_svm_values_unnormalized);
444 SG_ERROR(
"length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.
vlen,
m_N)
452 SG_ERROR(
"length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.
vlen,
m_N)
470 for (int32_t i=0; i<
m_N; i++)
472 for (int32_t j=0; j<
m_N; j++)
484 if (!((num_cols==3) || (num_cols==4)))
485 SG_ERROR(
"!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols)
487 SG_FREE(trans_list_forward);
488 SG_FREE(trans_list_forward_cnt);
489 SG_FREE(trans_list_forward_val);
490 SG_FREE(trans_list_forward_id);
492 trans_list_forward = NULL ;
493 trans_list_forward_cnt = NULL ;
494 trans_list_forward_val = NULL ;
500 mem_initialized = true ;
502 trans_list_forward_cnt=NULL ;
503 trans_list_len =
m_N ;
507 trans_list_forward_id = SG_MALLOC(int32_t*,
m_N);
510 for (int32_t j=0; j<
m_N; j++)
512 int32_t old_start_idx=start_idx;
514 while (start_idx<num_trans && a_trans.
matrix[start_idx+num_trans]==j)
518 if (start_idx>1 && start_idx<num_trans)
522 if (start_idx>1 && start_idx<num_trans)
525 int32_t len=start_idx-old_start_idx;
528 trans_list_forward_cnt[j] = 0 ;
532 trans_list_forward[j] = SG_MALLOC(
T_STATES, len);
533 trans_list_forward_val[j] = SG_MALLOC(
float64_t, len);
534 trans_list_forward_id[j] = SG_MALLOC(int32_t, len);
538 trans_list_forward[j] = NULL;
539 trans_list_forward_val[j] = NULL;
540 trans_list_forward_id[j] = NULL;
544 for (int32_t i=0; i<num_trans; i++)
546 int32_t from_state = (int32_t)a_trans.
matrix[i] ;
547 int32_t to_state = (int32_t)a_trans.
matrix[i+num_trans] ;
551 id = (int32_t)a_trans.
matrix[i+num_trans*3] ;
554 ASSERT(to_state>=0 && to_state<m_N)
555 ASSERT(from_state>=0 && from_state<m_N)
557 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
558 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
559 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
560 trans_list_forward_cnt[to_state]++ ;
567 for (int32_t i=0; i<
m_N; i++)
568 for (int32_t j=0; j<
m_N; j++)
633 SG_WARNING(
"SVM array: word_degree.get_dim1()!=m_num_degrees")
635 SG_WARNING(
"SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1")
637 SG_WARNING(
"SVM array: m_num_words.get_dim1()==m_num_degrees")
641 SG_WARNING(
"SVM array: m_num_unique_words.get_dim1()!=m_num_degrees")
643 SG_WARNING(
"SVM array: m_mod_words.get_dim1()!=num_svms")
645 SG_WARNING(
"SVM array: m_mod_words.get_dim2()!=2")
647 SG_WARNING(
"SVM array: m_sign_words.get_dim1()!=num_svms")
649 SG_WARNING(
"SVM array: m_string_words.get_dim1()!=num_svms")
659 SG_ERROR(
"Expected 3-dimensional Matrix\n")
661 int32_t N=seq.
dims[0];
662 int32_t cand_pos=seq.
dims[1];
663 int32_t max_num_features=seq.
dims[2];
688 if (seg_path.
matrix!=NULL)
690 int32_t *segment_ids = SG_MALLOC(int32_t,
m_seq_len);
694 segment_ids[i] = (int32_t)seg_path.
matrix[2*i] ;
695 segment_mask[i] = seg_path.
matrix[2*i+1] ;
698 SG_FREE(segment_ids);
699 SG_FREE(segment_mask);
703 int32_t *izeros = SG_MALLOC(int32_t,
m_seq_len);
733 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
734 SG_ERROR(
"Sparse features must either both be NULL or both NON-NULL\n")
782 SG_ERROR(
"m_dict_weights array does not match num_svms=%i!=%i\n",
803 SG_ERROR(
"segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n)
815 int32_t* segment_ids,
float64_t* segment_mask, int32_t m)
821 for (int32_t i=1;i<m;i++)
868 int32_t sz =
sizeof(
float64_t)*(*seq_len);
870 *scores = SG_MALLOC(
float64_t, *seq_len);
882 int32_t sz =
sizeof(
float64_t)*(*seq_len);
884 *losses = SG_MALLOC(
float64_t, *seq_len);
893 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
896 #ifdef DYNPROG_TIMING_DETAIL
905 int32_t orf_target = orf_to-orf_from ;
906 if (orf_target<0) orf_target+=3 ;
916 #ifdef DYNPROG_TIMING_DETAIL
918 orf_time += MyTime.time_diff_sec() ;
923 for (; pos>=start; pos-=3)
926 #ifdef DYNPROG_TIMING_DETAIL
928 orf_time += MyTime.time_diff_sec() ;
936 #ifdef DYNPROG_TIMING_DETAIL
938 orf_time += MyTime.time_diff_sec() ;
944 int16_t nbest,
bool with_loss,
bool with_multiple_sequences)
959 for (int32_t i=0; i<nbest; i++)
977 #ifdef DYNPROG_TIMING
978 segment_init_time = 0.0 ;
979 segment_pos_time = 0.0 ;
980 segment_extend_time = 0.0 ;
981 segment_clean_time = 0.0 ;
983 svm_init_time = 0.0 ;
985 svm_clean_time = 0.0 ;
986 inner_loop_time = 0.0 ;
987 content_svm_values_time = 0.0 ;
988 content_plifs_time = 0.0 ;
989 inner_loop_max_time = 0.0 ;
990 long_transition_time = 0.0 ;
1001 #ifdef DYNPROG_DEBUG
1010 int32_t max_look_back = 1000 ;
1011 bool use_svm = false ;
1028 #ifdef DYNPROG_DEBUG
1032 SG_PRINT(
"m_num_lin_feat_plifs_cum: ")
1048 if (seq_array!=NULL)
1060 SG_PRINT(
"using sparse seq_array\n")
1064 ASSERT(max_num_signals==2)
1067 for (int32_t i=0; i<
m_N; i++)
1071 for (int32_t i=0; i<
m_N; i++)
1073 for (int32_t k=0; k<max_num_signals; k++)
1075 if ((PEN_state_signals.
element(i,k)==NULL) && (k==0))
1078 if (seq_input!=NULL)
1089 if (PEN_state_signals.
element(i,k)!=NULL)
1091 if (seq_input!=NULL)
1132 long_transition_content_start_position.
set_array_name(
"long_transition_content_start_position");
1133 #ifdef DYNPROG_DEBUG
1135 long_transition_content_end_position.
set_array_name(
"long_transition_content_end_position");
1138 long_transition_content_start.
set_array_name(
"long_transition_content_start");
1141 long_transition_content_scores.
set_array_name(
"long_transition_content_scores");
1142 #ifdef DYNPROG_DEBUG
1145 long_transition_content_scores_pen.
set_array_name(
"long_transition_content_scores_pen");
1148 long_transition_content_scores_prev.
set_array_name(
"long_transition_content_scores_prev");
1151 long_transition_content_scores_elem.
set_array_name(
"long_transition_content_scores_elem");
1154 long_transition_content_scores_loss.
set_array_name(
"long_transition_content_scores_loss");
1158 SG_ERROR(
"Long transitions are not supported for nbest!=1")
1159 long_transitions = false ;
1162 #ifdef DYNPROG_DEBUG
1163 long_transition_content_scores_pen.
set_const(0) ;
1164 long_transition_content_scores_elem.
set_const(0) ;
1165 long_transition_content_scores_prev.
set_const(0) ;
1168 long_transition_content_scores_loss.
set_const(0) ;
1169 long_transition_content_start.
set_const(0) ;
1170 long_transition_content_start_position.
set_const(0) ;
1171 #ifdef DYNPROG_DEBUG
1172 long_transition_content_end_position.
set_const(0) ;
1188 for (int32_t i=0; i<
m_N; i++)
1189 for (int32_t j=0; j<
m_N; j++)
1195 for (int32_t j=0; j<
m_N; j++)
1198 const T_STATES num_elem = trans_list_forward_cnt[j] ;
1199 const T_STATES *elem_list = trans_list_forward[j] ;
1201 for (int32_t i=0; i<num_elem; i++)
1208 if (long_transitions)
1238 if (long_transitions)
1244 int32_t num_long_transitions = 0 ;
1245 for (int32_t i=0; i<
m_N; i++)
1246 for (int32_t j=0; j<
m_N; j++)
1249 num_long_transitions++ ;
1252 if (long_transitions)
1264 SG_DEBUG(
"Using %i long transitions\n", num_long_transitions)
1271 SG_DEBUG(
"maxlook: %d m_N: %d nbest: %d \n", max_look_back,
m_N, nbest)
1272 const int32_t look_back_buflen = (max_look_back*
m_N+1)*nbest ;
1273 SG_DEBUG(
"look_back_buflen=%i\n", look_back_buflen)
1320 memset(fixedtempvv, 0, look_back_buflen*
sizeof(
float64_t)) ;
1321 int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen);
1322 memset(fixedtempii, 0, look_back_buflen*
sizeof(int32_t)) ;
1368 #ifdef USE_TMP_ARRAYCLASS
1369 fixedtempvv.set_array_name(
"fixedtempvv") ;
1370 fixedtempii.set_array_name(
"fixedtempvv") ;
1381 #ifdef DYNPROG_DEBUG
1394 PEN.display_size() ;
1395 PEN_state_signals.display_size() ;
1408 #ifdef USE_TMP_ARRAYCLASS
1409 fixedtempvv.display_size() ;
1410 fixedtempii.display_size() ;
1421 #endif //DYNPROG_DEBUG
1446 for (int16_t k=1; k<nbest; k++)
1448 int32_t dim1, dim2, dim3 ;
1492 for (int16_t k=0; k<nbest; k++)
1494 delta.
element(delta_array, t, j, k, m_seq_len, m_N) = seq.
element(j,t) ;
1503 const T_STATES num_elem = trans_list_forward_cnt[j] ;
1504 const T_STATES *elem_list = trans_list_forward[j] ;
1505 const float64_t *elem_val = trans_list_forward_val[j] ;
1506 const int32_t *elem_id = trans_list_forward_id[j] ;
1508 int32_t fixed_list_len = 0 ;
1510 int32_t fixedtempii_ = 0 ;
1511 bool fixedtemplong = false ;
1513 for (int32_t i=0; i<num_elem; i++)
1530 int32_t look_back_ = look_back.
element(j, ii) ;
1534 if((orf_from!=-1)!=(orf_to!=-1))
1535 SG_DEBUG(
"j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
1536 ASSERT((orf_from!=-1)==(orf_to!=-1))
1538 int32_t orf_target = -1 ;
1541 orf_target=orf_to-orf_from ;
1544 ASSERT(orf_target>=0 && orf_target<3)
1547 int32_t orf_last_pos =
m_pos[t] ;
1548 #ifdef DYNPROG_TIMING
1551 int32_t num_ok_pos = 0 ;
1553 for (int32_t ts=t-1; ts>=0 &&
m_pos[t]-
m_pos[ts]<=look_back_; ts--)
1567 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
1568 ok=(!use_orf) ||
extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
1586 int32_t frame = orf_from;
1592 #ifdef DYNPROG_TIMING_DETAIL
1595 pen_val = penalty->
lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
1597 #ifdef DYNPROG_TIMING_DETAIL
1599 content_plifs_time += MyTime.time_diff_sec() ;
1603 #ifdef DYNPROG_TIMING_DETAIL
1612 val += segment_loss ;
1614 float64_t mval = -(val + delta.
element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
1616 if (mval<fixedtempvv_)
1618 fixedtempvv_ = mval ;
1619 fixedtempii_ = ii + ts*
m_N;
1620 fixed_list_len = 1 ;
1621 fixedtemplong = false ;
1626 for (int16_t diff=0; diff<nbest; diff++)
1631 val += segment_loss ;
1633 float64_t mval = -(val + delta.
element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
1639 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
1641 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
1643 fixedtempvv[fixed_list_len] = mval ;
1644 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
1649 int32_t addhere = fixed_list_len;
1650 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
1654 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
1656 fixedtempvv[jj] = fixedtempvv[jj-1];
1657 fixedtempii[jj] = fixedtempii[jj-1];
1660 fixedtempvv[addhere] = mval;
1661 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
1663 if (fixed_list_len < nbest)
1669 #ifdef DYNPROG_TIMING_DETAIL
1671 inner_loop_max_time += MyTime.time_diff_sec() ;
1675 #ifdef DYNPROG_TIMING
1677 inner_loop_time += MyTime3.time_diff_sec() ;
1680 for (int32_t i=0; i<num_elem; i++)
1697 int32_t look_back_ = look_back.
element(j, ii) ;
1702 if((orf_from!=-1)!=(orf_to!=-1))
1703 SG_DEBUG(
"j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i])
1704 ASSERT((orf_from!=-1)==(orf_to!=-1))
1706 int32_t orf_target = -1 ;
1709 orf_target=orf_to-orf_from ;
1712 ASSERT(orf_target>=0 && orf_target<3)
1718 #ifdef DYNPROG_TIMING
1729 #ifdef DYNPROG_TIMING
1738 int32_t start = long_transition_content_start.
get_element(ii, j) ;
1739 int32_t end_5p_part = start ;
1743 while (end_5p_part<=t &&
m_pos[end_5p_part+1]-
m_pos[start_5p_part]<=m_long_transition_threshold)
1746 ASSERT(
m_pos[end_5p_part+1]-
m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t)
1747 ASSERT(
m_pos[end_5p_part]-
m_pos[start_5p_part] <= m_long_transition_threshold)
1764 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.
element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ;
1773 mval_trans -= segment_loss_part1 ;
1784 long_transition_content_start_position.
set_element(0, ii, j) ;
1786 long_transition_content_scores_loss.
set_element(0.0, ii, j) ;
1787 #ifdef DYNPROG_DEBUG
1788 long_transition_content_scores_pen.
set_element(0.0, ii, j) ;
1789 long_transition_content_scores_elem.
set_element(0.0, ii, j) ;
1790 long_transition_content_scores_prev.
set_element(0.0, ii, j) ;
1791 long_transition_content_end_position.
set_element(0, ii, j) ;
1799 long_transition_content_scores.
set_element(score, ii, j) ;
1800 long_transition_content_scores_loss.
set_element(new_loss, ii, j) ;
1801 #ifdef DYNPROG_DEBUG
1802 long_transition_content_end_position.
set_element(end_5p_part, ii, j) ;
1806 if (-long_transition_content_scores.
get_element(ii, j) > mval_trans )
1809 long_transition_content_scores.
set_element(-mval_trans, ii, j) ;
1810 long_transition_content_start_position.
set_element(start_5p_part, ii, j) ;
1812 long_transition_content_scores_loss.
set_element(segment_loss_part1, ii, j) ;
1813 #ifdef DYNPROG_DEBUG
1814 long_transition_content_scores_pen.
set_element(pen_val*0.5, ii, j) ;
1815 long_transition_content_scores_elem.
set_element(elem_val[i], ii, j) ;
1816 long_transition_content_scores_prev.
set_element(delta.
element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ;
1820 long_transition_content_end_position.
set_element(end_5p_part, ii, j) ;
1827 long_transition_content_start.
set_element(start_5p_part, ii, j) ;
1837 while (ts>0 &&
m_pos[t]-
m_pos[ts-1] <= m_long_transition_threshold)
1846 float pen_val_3p = 0.0 ;
1849 int32_t frame = orf_from ;
1857 #ifdef DYNPROG_DEBUG
1867 #ifdef DYNPROG_DEBUG
1874 mval -= (segment_loss_total-long_transition_content_scores_loss.
get_element(ii, j)) ;
1877 #ifdef DYNPROG_DEBUG
1880 SG_PRINT(
"Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i) scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n",
1882 long_transition_content_scores.
get_element(ii, j),
1883 long_transition_content_scores_pen.
get_element(ii, j),
1884 long_transition_content_scores_prev.
get_element(ii, j),
1885 long_transition_content_scores_elem.
get_element(ii, j),
1886 long_transition_content_scores_loss.
get_element(ii, j),
1889 m_pos[long_transition_content_start.
get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.
get_element(ii,j), t) ;
1890 SG_PRINT(
"fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] )
1893 if (fabs(segment_loss_part2+long_transition_content_scores_loss.
get_element(ii, j) - segment_loss_total)>1e-3)
1895 SG_ERROR(
"LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n",
1897 long_transition_content_scores_loss.
get_element(ii, j), segment_loss_part1,
m_pos[long_transition_content_start_position.
get_element(ii,j)], m_pos[long_transition_content_end_position.
get_element(ii,j)],
1898 segment_loss_part2, m_pos[long_transition_content_end_position.
get_element(ii,j)], m_pos[t],
1899 segment_loss_part2+long_transition_content_scores_loss.
get_element(ii, j),
1900 segment_loss_part2+long_transition_content_scores_loss.
get_element(ii, j) - segment_loss_total) ;
1910 if (mval < fixedtempvv_)
1913 int32_t fromtjk = fixedtempii_ ;
1919 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 ||
m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=
m_pos[long_transition_content_start_position.
get_element(ii, j)] || fixedtemplong)
1921 fixedtempvv_ = mval ;
1922 fixedtempii_ = ii + m_N*long_transition_content_start_position.
get_element(ii, j) ;
1923 fixed_list_len = 1 ;
1924 fixedtemplong = true ;
1929 #ifdef DYNPROG_TIMING
1931 long_transition_time += MyTime3.time_diff_sec() ;
1935 int32_t numEnt = fixed_list_len;
1940 for (int16_t k=0; k<nbest; k++)
1946 minusscore = fixedtempvv_ ;
1947 fromtjk = fixedtempii_ ;
1951 minusscore = fixedtempvv[k];
1952 fromtjk = fixedtempii[k];
1955 delta.
element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.
element(j,t);
1958 ktable.
element(t,j,k) = (fromtjk%(m_N*nbest)-psi.
element(t,j,k))/m_N ;
1959 ptable.
element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
1974 int32_t list_len = 0 ;
1975 for (int16_t diff=0; diff<nbest; diff++)
1979 oldtempvv[list_len] = -(delta.
element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+
get_q(i)) ;
1980 oldtempii[list_len] = i + diff*
m_N ;
1987 for (int16_t k=0; k<nbest; k++)
1989 delta_end.
element(k) = -oldtempvv[k] ;
1999 for (int16_t k=0; k<nbest; k++)
2001 prob_nbest[k]= delta_end.
element(k) ;
2004 state_seq[i] = path_ends.
element(k) ;
2008 pos_seq[i] = m_seq_len-1 ;
2010 while (pos_seq[i]>0)
2014 state_seq[i+1] = psi.
element(pos_seq[i], state_seq[i], q);
2015 pos_seq[i+1] = ptable.
element(pos_seq[i], state_seq[i], q) ;
2017 q = ktable.
element(pos_seq[i], state_seq[i], q) ;
2021 int32_t num_states = i+1 ;
2022 for (i=0; i<num_states;i++)
2024 my_state_seq[i+k*
m_seq_len] = state_seq[num_states-i-1] ;
2025 my_pos_seq[i+k*
m_seq_len] = pos_seq[num_states-i-1] ;
2027 if (num_states<m_seq_len)
2029 my_state_seq[num_states+k*
m_seq_len]=-1 ;
2039 #ifdef DYNPROG_TIMING
2043 SG_PRINT(
"Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec())
2046 SG_FREE(fixedtempvv);
2047 SG_FREE(fixedtempii);
2052 int32_t *my_state_seq, int32_t *my_pos_seq,
2053 int32_t my_seq_len,
const float64_t *seq_array, int32_t max_num_signals)
2077 bool use_svm = false ;
2089 for (int32_t i=0; i<
m_N; i++)
2090 for (int32_t j=0; j<
m_N; j++)
2100 for (int32_t i=0; i<
m_N; i++)
2101 for (int32_t j=0; j<max_num_signals; j++)
2114 for (int32_t i=0; i<
m_N; i++)
2118 for (int32_t j=0; j<
m_N; j++)
2124 for (int32_t i=0; i<my_seq_len; i++)
2143 svm_value_part1[s]=0 ;
2144 svm_value_part2[s]=0 ;
2152 ASSERT(my_state_seq[0]>=0)
2156 ASSERT(my_state_seq[my_seq_len-1]>=0)
2161 total_score += my_scores[0] + my_scores[my_seq_len-1] ;
2164 SG_DEBUG(
"m_seq_len=%i\n", my_seq_len)
2165 for (int32_t i=0; i<my_seq_len-1; i++)
2167 if (my_state_seq[i+1]==-1)
2169 int32_t from_state = my_state_seq[i] ;
2170 int32_t to_state = my_state_seq[i+1] ;
2171 int32_t from_pos = my_pos_seq[i] ;
2172 int32_t to_pos = my_pos_seq[i+1] ;
2177 #ifdef DYNPROG_DEBUG
2185 SG_PRINT(
"loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3)
2188 SG_PRINT(
"%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
2192 SG_DEBUG(
"%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state)
2198 #ifdef DYNPROG_DEBUG
2199 SG_DEBUG(
"%i. scores[i]=%f\n", i, my_scores[i])
2206 bool is_long_transition = false ;
2210 is_long_transition = true ;
2212 is_long_transition =
false ;
2215 int32_t from_pos_thresh = from_pos ;
2216 int32_t to_pos_thresh = to_pos ;
2220 if (is_long_transition)
2225 ASSERT(from_pos_thresh<to_pos)
2232 #ifdef DYNPROG_DEBUG
2233 SG_PRINT(
"part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ",
m_pos[from_pos],
m_pos[from_pos_thresh],
m_pos[from_pos_thresh+1])
2235 SG_PRINT(
"%1.4f ", svm_value_part1[s])
2247 #ifdef DYNPROG_DEBUG
2248 SG_PRINT(
"part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ",
m_pos[to_pos],
m_pos[to_pos_thresh],
m_pos[to_pos_thresh+1])
2250 SG_PRINT(
"%1.4f ", svm_value_part2[s])
2262 int32_t num_current_svms=0;
2263 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
2264 SG_PRINT(
"penalties(%i, %i), frame:%i ", from_state, to_state, frame)
2265 ((
CPlifBase*) PEN.
element(to_state, from_state))->get_used_svms(&num_current_svms, svm_ids);
2270 #ifdef DYNPROG_DEBUG
2279 if (PEN.
element(to_state, from_state)!=NULL)
2282 if (is_long_transition)
2286 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
2292 SG_PRINT(
"is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n",
2293 is_long_transition,
m_pos[from_pos], from_state,
m_pos[to_pos], to_state, nscore) ;
2295 my_scores[i] += nscore ;
2304 #ifdef DYNPROG_DEBUG
2307 if (is_long_transition)
2309 #ifdef DYNPROG_DEBUG
2312 for (
int kk=0; kk<i; kk++)
2313 sum_score += my_scores[i] ;
2315 SG_PRINT(
"is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n",
2316 is_long_transition,
m_pos[from_pos], from_state,
m_pos[to_pos], to_state,
2318 PEN.
element(to_state, from_state)->lookup_penalty(
m_pos[from_pos_thresh]-
m_pos[from_pos], svm_value_part1)*0.5,
m_pos[from_pos],
m_pos[from_pos_thresh],
2319 PEN.
element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
2323 if (is_long_transition)
2325 ((
CPlifBase*) PEN.
element(to_state, from_state))->penalty_add_derivative(
m_pos[from_pos_thresh]-
m_pos[from_pos], svm_value_part1, 0.5) ;
2326 ((
CPlifBase*) PEN.
element(to_state, from_state))->penalty_add_derivative(
m_pos[to_pos]-
m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
2337 if (is_long_transition)
2345 for (int32_t k=0;k<num_intensities;k++)
2347 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2348 svm_value[j]=intensities[k];
2354 for (int32_t k=0;k<num_intensities;k++)
2356 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2357 svm_value[j]=intensities[k];
2362 SG_FREE(intensities);
2375 for (int32_t k=0;k<num_intensities;k++)
2377 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
2378 svm_value[j]=intensities[k];
2383 SG_FREE(intensities);
2388 #ifdef DYNPROG_DEBUG
2389 SG_DEBUG(
"%i. scores[i]=%f\n", i, my_scores[i])
2393 for (int32_t k=0; k<max_num_signals; k++)
2395 if ((PEN_state_signals.
element(to_state,k)==NULL)&&(k==0))
2397 #ifdef DYNPROG_DEBUG
2398 SG_DEBUG(
"%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.
element(to_state, to_pos, k))
2400 my_scores[i] += seq_input.
element(to_state, to_pos, k) ;
2405 if (PEN_state_signals.
element(to_state, k)!=NULL)
2408 my_scores[i] += nscore ;
2409 #ifdef DYNPROG_DEBUG
2412 SG_PRINT(
"is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n",
2413 is_long_transition,
m_pos[from_pos], from_pos, from_state,
m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.
element(to_state, to_pos, k)) ;
2414 for (
int x=0; x<23; x++)
2416 for (
int i=-10; i<10; i++)
2431 #ifdef DYNPROG_DEBUG
2432 SG_DEBUG(
"%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.
element(to_state, to_pos, k), nscore, k)
2434 ((
CPlifBase*) PEN_state_signals.
element(to_state,k))->penalty_add_derivative(seq_input.
element(to_state, to_pos, k), svm_value, 1) ;
2442 total_score += my_scores[i] ;
2443 total_loss += my_losses[i] ;
2451 SG_FREE(svm_value_part1);
2452 SG_FREE(svm_value_part2);
2458 int32_t num_intensities = 0;
2462 int32_t num = m_num_probes_cum[type-1];
2463 while (*p_tiling_pos<to_pos)
2465 if (*p_tiling_pos>=from_pos)
2467 intensities[num_intensities] = *p_tiling_data;
2471 if (num>=m_num_probes_cum[type])
2473 last_pos = *p_tiling_pos;
2476 ASSERT(last_pos<*p_tiling_pos)
2478 return num_intensities;
2483 #ifdef DYNPROG_TIMING_DETAIL
2493 svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
2499 svm_values[i] = to_val-from_val ;
2505 int32_t intron_list_start = m_num_lin_feat_plifs_cum[
m_num_raw_data];
2508 for (int32_t i=intron_list_start; i<intron_list_end;i++)
2510 svm_values[i] = (
float64_t) (support[cnt]);
2521 svm_values[frame_plifs[1]] = 1e10;
2522 svm_values[frame_plifs[2]] = 1e10;
2523 int32_t global_frame = from_pos%3;
2524 int32_t row = ((global_frame+frame)%3)+4;
2527 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
2529 #ifdef DYNPROG_TIMING_DETAIL
2531 content_svm_values_time += MyTime.time_diff_sec() ;