00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <shogun/structure/DynProg.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/lib/config.h>
00017 #include <shogun/features/StringFeatures.h>
00018 #include <shogun/features/Alphabet.h>
00019 #include <shogun/structure/Plif.h>
00020 #include <shogun/structure/IntronList.h>
00021
00022 #include <stdlib.h>
00023 #include <stdio.h>
00024 #include <time.h>
00025 #include <ctype.h>
00026
00027 using namespace shogun;
00028
00029
00030
00031
00032 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ;
00033 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ;
00034 int32_t CDynProg::frame_plifs[3]={4,5,6};
00035 int32_t CDynProg::num_words_default[4]= {64,256,1024,4096} ;
00036 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1,
00037 1,1,1,1,1,1,1,1,
00038 0,0,0,0,0,0,0,0,
00039 0,0,0,0,0,0,0,0} ;
00040 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true,
00041 false,false,false,false,false,false,false,false} ;
00042 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0,
00043 1,1,1,1,1,1,1,1} ;
00044
00045 CDynProg::CDynProg(int32_t num_svms )
00046 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
00047 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
00048 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
00049 m_end_state_distribution_q_deriv(1),
00050
00051
00052 m_num_degrees(4),
00053 m_num_svms(num_svms),
00054 m_word_degree(word_degree_default, m_num_degrees, true, true),
00055 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
00056 m_cum_num_words_array(m_cum_num_words.get_array()),
00057 m_num_words(num_words_default, m_num_degrees, true, true),
00058 m_num_words_array(m_num_words.get_array()),
00059 m_mod_words(mod_words_default, m_num_svms, 2, true, true),
00060 m_mod_words_array(m_mod_words.get_array()),
00061 m_sign_words(sign_words_default, m_num_svms, true, true),
00062 m_sign_words_array(m_sign_words.get_array()),
00063 m_string_words(string_words_default, m_num_svms, true, true),
00064 m_string_words_array(m_string_words.get_array()),
00065
00066 m_num_unique_words(m_num_degrees),
00067 m_svm_arrays_clean(true),
00068
00069 m_max_a_id(0), m_observation_matrix(1,1,1),
00070 m_pos(1),
00071 m_seq_len(0),
00072 m_orf_info(1,2),
00073 m_plif_list(1),
00074 m_PEN(1,1),
00075 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2),
00076 m_segment_ids(1),
00077 m_segment_mask(1),
00078 m_my_state_seq(1),
00079 m_my_pos_seq(1),
00080 m_my_scores(1),
00081 m_my_losses(1),
00082 m_scores(1),
00083 m_states(1,1),
00084 m_positions(1,1),
00085
00086 m_seq_sparse1(NULL),
00087 m_seq_sparse2(NULL),
00088 m_plif_matrices(NULL),
00089
00090 m_genestr_stop(1),
00091 m_intron_list(NULL),
00092 m_num_intron_plifs(0),
00093 m_lin_feat(1,1),
00094 m_raw_intensities(NULL),
00095 m_probe_pos(NULL),
00096 m_num_probes_cum(NULL),
00097 m_num_lin_feat_plifs_cum(NULL),
00098 m_num_raw_data(0),
00099
00100 m_long_transitions(true),
00101 m_long_transition_threshold(1000)
00102 {
00103 trans_list_forward = NULL ;
00104 trans_list_forward_cnt = NULL ;
00105 trans_list_forward_val = NULL ;
00106 trans_list_forward_id = NULL ;
00107 trans_list_len = 0 ;
00108
00109 mem_initialized = true ;
00110
00111 m_N=1;
00112
00113 m_raw_intensities = NULL;
00114 m_probe_pos = NULL;
00115 m_num_probes_cum = SG_MALLOC(int32_t, 100);
00116 m_num_probes_cum[0] = 0;
00117
00118 m_num_lin_feat_plifs_cum = SG_MALLOC(int32_t, 100);
00119 m_num_lin_feat_plifs_cum[0] = m_num_svms;
00120 m_num_raw_data = 0;
00121 #ifdef ARRAY_STATISTICS
00122 m_word_degree.set_array_name("word_degree");
00123 #endif
00124
00125 m_transition_matrix_a_id.set_array_name("transition_matrix_a_id");
00126 m_transition_matrix_a.set_array_name("transition_matrix_a");
00127 m_transition_matrix_a_deriv.set_array_name("transition_matrix_a_deriv");
00128 m_mod_words.set_array_name("mod_words");
00129 m_orf_info.set_array_name("orf_info");
00130 m_segment_sum_weights.set_array_name("segment_sum_weights");
00131 m_PEN.set_array_name("PEN");
00132 m_PEN_state_signals.set_array_name("PEN_state_signals");
00133 m_dict_weights.set_array_name("dict_weights");
00134 m_states.set_array_name("states");
00135 m_positions.set_array_name("positions");
00136 m_lin_feat.set_array_name("lin_feat");
00137
00138
00139 m_observation_matrix.set_array_name("m_observation_matrix");
00140 m_segment_loss.set_array_name("m_segment_loss");
00141 m_seg_loss_obj = new CSegmentLoss();
00142 }
00143
00144 CDynProg::~CDynProg()
00145 {
00146 if (trans_list_forward_cnt)
00147 SG_FREE(trans_list_forward_cnt);
00148 if (trans_list_forward)
00149 {
00150 for (int32_t i=0; i<trans_list_len; i++)
00151 {
00152 if (trans_list_forward[i])
00153 SG_FREE(trans_list_forward[i]);
00154 }
00155 SG_FREE(trans_list_forward);
00156 }
00157 if (trans_list_forward_val)
00158 {
00159 for (int32_t i=0; i<trans_list_len; i++)
00160 {
00161 if (trans_list_forward_val[i])
00162 SG_FREE(trans_list_forward_val[i]);
00163 }
00164 SG_FREE(trans_list_forward_val);
00165 }
00166 if (trans_list_forward_id)
00167 {
00168 for (int32_t i=0; i<trans_list_len; i++)
00169 {
00170 if (trans_list_forward_id[i])
00171 SG_FREE(trans_list_forward_id[i]);
00172 }
00173 SG_FREE(trans_list_forward_id);
00174 }
00175 if (m_raw_intensities)
00176 SG_FREE(m_raw_intensities);
00177 if (m_probe_pos)
00178 SG_FREE(m_probe_pos);
00179 if (m_num_probes_cum)
00180 SG_FREE(m_num_probes_cum);
00181 if (m_num_lin_feat_plifs_cum)
00182 SG_FREE(m_num_lin_feat_plifs_cum);
00183
00184 delete m_intron_list;
00185
00186 SG_UNREF(m_seq_sparse1);
00187 SG_UNREF(m_seq_sparse2);
00188 SG_UNREF(m_plif_matrices);
00189 }
00190
00192 int32_t CDynProg::get_num_svms()
00193 {
00194 return m_num_svms;
00195 }
00196
00197 void CDynProg::precompute_stop_codons()
00198 {
00199 int32_t length=m_genestr.get_dim1();
00200
00201 m_genestr_stop.resize_array(length) ;
00202 m_genestr_stop.set_const(0) ;
00203 m_genestr_stop.set_array_name("genestr_stop") ;
00204 {
00205 for (int32_t i=0; i<length-2; i++)
00206 if ((m_genestr[i]=='t' || m_genestr[i]=='T') &&
00207 (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') &&
00208 (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) ||
00209 ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') )))
00210 {
00211 m_genestr_stop.element(i)=true ;
00212 }
00213 else
00214 m_genestr_stop.element(i)=false ;
00215 m_genestr_stop.element(length-2)=false ;
00216 m_genestr_stop.element(length-1)=false ;
00217 }
00218 }
00219
00220 void CDynProg::set_num_states(int32_t p_N)
00221 {
00222 m_N=p_N ;
00223
00224 m_transition_matrix_a_id.resize_array(m_N,m_N) ;
00225 m_transition_matrix_a.resize_array(m_N,m_N) ;
00226 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
00227 m_initial_state_distribution_p.resize_array(m_N) ;
00228 m_initial_state_distribution_p_deriv.resize_array(m_N) ;
00229 m_end_state_distribution_q.resize_array(m_N);
00230 m_end_state_distribution_q_deriv.resize_array(m_N) ;
00231
00232 m_orf_info.resize_array(m_N,2) ;
00233 m_PEN.resize_array(m_N,m_N) ;
00234 }
00235
00236 int32_t CDynProg::get_num_states()
00237 {
00238 return m_N;
00239 }
00240
00241 void CDynProg::init_tiling_data(
00242 int32_t* probe_pos, float64_t* intensities, const int32_t num_probes)
00243 {
00244 m_num_raw_data++;
00245 m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes;
00246
00247 int32_t* tmp_probe_pos = SG_MALLOC(int32_t, m_num_probes_cum[m_num_raw_data]);
00248 float64_t* tmp_raw_intensities = SG_MALLOC(float64_t, m_num_probes_cum[m_num_raw_data]);
00249
00250
00251 if (m_num_raw_data==1){
00252 memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00253 memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
00254
00255 }else{
00256 memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t));
00257 memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));
00258 memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));
00259 memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));
00260 }
00261 SG_FREE(m_probe_pos);
00262 SG_FREE(m_raw_intensities);
00263 m_probe_pos = tmp_probe_pos;
00264 m_raw_intensities = tmp_raw_intensities;
00265
00266
00267
00268
00269 }
00270
00271 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms)
00272 {
00273 m_lin_feat.resize_array(p_num_svms, m_seq_len);
00274
00275
00276 for (int s=0; s<p_num_svms; s++)
00277 for (int p=0; p<m_seq_len; p++)
00278 m_lin_feat.set_element(0.0, s, p) ;
00279 }
00280
00281 void CDynProg::resize_lin_feat(const int32_t num_new_feat)
00282 {
00283 int32_t dim1, dim2;
00284 m_lin_feat.get_array_size(dim1, dim2);
00285 ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]);
00286 ASSERT(dim2==m_seq_len);
00287
00288
00289
00290 float64_t* arr = m_lin_feat.get_array();
00291 float64_t* tmp = SG_MALLOC(float64_t, (dim1+num_new_feat)*dim2);
00292 memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
00293 for(int32_t j=0;j<m_seq_len;j++)
00294 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
00295 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
00296
00297 m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);
00298 SG_FREE(tmp);
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312 }
00313
00314 void CDynProg::precompute_tiling_plifs(
00315 CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs)
00316 {
00317 m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs;
00318 float64_t* tiling_plif = SG_MALLOC(float64_t, num_tiling_plifs);
00319 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
00320 for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++)
00321 svm_value[i]=0.0;
00322 int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs);
00323 for (int32_t i=0; i<num_tiling_plifs; i++)
00324 {
00325 tiling_plif[i]=0.0;
00326 CPlif * plif = PEN[tiling_plif_ids[i]];
00327 tiling_rows[i] = plif->get_use_svm();
00328
00329 ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
00330 }
00331 resize_lin_feat(num_tiling_plifs);
00332
00333
00334 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
00335 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
00336 int32_t num=m_num_probes_cum[m_num_raw_data-1];
00337
00338 for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++)
00339 {
00340 while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx])
00341 {
00342 for (int32_t i=0; i<num_tiling_plifs; i++)
00343 {
00344 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
00345 CPlif * plif = PEN[tiling_plif_ids[i]];
00346 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1);
00347 plif->set_do_calc(true);
00348 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00349 plif->set_do_calc(false);
00350 }
00351 p_tiling_data++;
00352 p_tiling_pos++;
00353 num++;
00354 }
00355 for (int32_t i=0; i<num_tiling_plifs; i++)
00356 m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
00357 }
00358 SG_FREE(svm_value);
00359 SG_FREE(tiling_plif);
00360 SG_FREE(tiling_rows);
00361 }
00362
00363 void CDynProg::create_word_string()
00364 {
00365 SG_FREE(m_wordstr);
00366 m_wordstr=SG_MALLOC(uint16_t**, 5440);
00367 int32_t k=0;
00368 int32_t genestr_len=m_genestr.get_dim1();
00369
00370 m_wordstr[k]=SG_MALLOC(uint16_t*, m_num_degrees);
00371 for (int32_t j=0; j<m_num_degrees; j++)
00372 {
00373 m_wordstr[k][j]=NULL ;
00374 {
00375 m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len);
00376 for (int32_t i=0; i<genestr_len; i++)
00377 switch (m_genestr[i])
00378 {
00379 case 'A':
00380 case 'a': m_wordstr[k][j][i]=0 ; break ;
00381 case 'C':
00382 case 'c': m_wordstr[k][j][i]=1 ; break ;
00383 case 'G':
00384 case 'g': m_wordstr[k][j][i]=2 ; break ;
00385 case 'T':
00386 case 't': m_wordstr[k][j][i]=3 ; break ;
00387 default: ASSERT(0) ;
00388 }
00389 CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ;
00390 }
00391 }
00392 }
00393
00394 void CDynProg::precompute_content_values()
00395 {
00396 for (int32_t s=0; s<m_num_svms; s++)
00397 m_lin_feat.set_element(0.0, s, 0);
00398
00399 for (int32_t p=0 ; p<m_seq_len-1 ; p++)
00400 {
00401 int32_t from_pos = m_pos[p];
00402 int32_t to_pos = m_pos[p+1];
00403 float64_t* my_svm_values_unnormalized = SG_MALLOC(float64_t, m_num_svms);
00404
00405
00406 ASSERT(from_pos<=m_genestr.get_dim1());
00407 ASSERT(to_pos<=m_genestr.get_dim1());
00408
00409 for (int32_t s=0; s<m_num_svms; s++)
00410 my_svm_values_unnormalized[s]=0.0;
00411
00412 for (int32_t i=from_pos; i<to_pos; i++)
00413 {
00414 for (int32_t j=0; j<m_num_degrees; j++)
00415 {
00416 uint16_t word = m_wordstr[0][j][i] ;
00417 for (int32_t s=0; s<m_num_svms; s++)
00418 {
00419
00420 if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1))
00421 continue;
00422 my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ;
00423 }
00424 }
00425 }
00426 for (int32_t s=0; s<m_num_svms; s++)
00427 {
00428 float64_t prev = m_lin_feat.get_element(s, p);
00429
00430 if (prev<-1e20 || prev>1e20)
00431 {
00432 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) ;
00433 prev=0 ;
00434 }
00435 m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1);
00436 }
00437 SG_FREE(my_svm_values_unnormalized);
00438 }
00439
00440
00441
00442 }
00443
00444 void CDynProg::set_p_vector(SGVector<float64_t> p)
00445 {
00446 if (!(p.vlen==m_N))
00447 SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.vlen, m_N);
00448
00449 m_initial_state_distribution_p.set_array(p.vector, p.vlen, true, true);
00450 }
00451
00452 void CDynProg::set_q_vector(SGVector<float64_t> q)
00453 {
00454 if (!(q.vlen==m_N))
00455 SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.vlen, m_N);
00456 m_end_state_distribution_q.set_array(q.vector, q.vlen, true, true);
00457 }
00458
00459 void CDynProg::set_a(SGMatrix<float64_t> a)
00460 {
00461 ASSERT(a.num_cols==m_N);
00462 ASSERT(a.num_rows==m_N);
00463 m_transition_matrix_a.set_array(a.matrix, m_N, m_N, true, true);
00464 m_transition_matrix_a_deriv.resize_array(m_N, m_N);
00465 }
00466
00467 void CDynProg::set_a_id(SGMatrix<int32_t> a)
00468 {
00469 ASSERT(a.num_cols==m_N);
00470 ASSERT(a.num_rows==m_N);
00471 m_transition_matrix_a_id.set_array(a.matrix, m_N, m_N, true, true);
00472 m_max_a_id = 0;
00473 for (int32_t i=0; i<m_N; i++)
00474 {
00475 for (int32_t j=0; j<m_N; j++)
00476 m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j));
00477 }
00478 }
00479
00480 void CDynProg::set_a_trans_matrix(SGMatrix<float64_t> a_trans)
00481 {
00482 int32_t num_trans=a_trans.num_rows;
00483 int32_t num_cols=a_trans.num_cols;
00484
00485
00486
00487 if (!((num_cols==3) || (num_cols==4)))
00488 SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols);
00489
00490 SG_FREE(trans_list_forward);
00491 SG_FREE(trans_list_forward_cnt);
00492 SG_FREE(trans_list_forward_val);
00493 SG_FREE(trans_list_forward_id);
00494
00495 trans_list_forward = NULL ;
00496 trans_list_forward_cnt = NULL ;
00497 trans_list_forward_val = NULL ;
00498 trans_list_len = 0 ;
00499
00500 m_transition_matrix_a.set_const(0) ;
00501 m_transition_matrix_a_id.set_const(0) ;
00502
00503 mem_initialized = true ;
00504
00505 trans_list_forward_cnt=NULL ;
00506 trans_list_len = m_N ;
00507 trans_list_forward = SG_MALLOC(T_STATES*, m_N);
00508 trans_list_forward_cnt = SG_MALLOC(T_STATES, m_N);
00509 trans_list_forward_val = SG_MALLOC(float64_t*, m_N);
00510 trans_list_forward_id = SG_MALLOC(int32_t*, m_N);
00511
00512 int32_t start_idx=0;
00513 for (int32_t j=0; j<m_N; j++)
00514 {
00515 int32_t old_start_idx=start_idx;
00516
00517 while (start_idx<num_trans && a_trans.matrix[start_idx+num_trans]==j)
00518 {
00519 start_idx++;
00520
00521 if (start_idx>1 && start_idx<num_trans)
00522 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]);
00523 }
00524
00525 if (start_idx>1 && start_idx<num_trans)
00526 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]);
00527
00528 int32_t len=start_idx-old_start_idx;
00529 ASSERT(len>=0);
00530
00531 trans_list_forward_cnt[j] = 0 ;
00532
00533 if (len>0)
00534 {
00535 trans_list_forward[j] = SG_MALLOC(T_STATES, len);
00536 trans_list_forward_val[j] = SG_MALLOC(float64_t, len);
00537 trans_list_forward_id[j] = SG_MALLOC(int32_t, len);
00538 }
00539 else
00540 {
00541 trans_list_forward[j] = NULL;
00542 trans_list_forward_val[j] = NULL;
00543 trans_list_forward_id[j] = NULL;
00544 }
00545 }
00546
00547 for (int32_t i=0; i<num_trans; i++)
00548 {
00549 int32_t from_state = (int32_t)a_trans.matrix[i] ;
00550 int32_t to_state = (int32_t)a_trans.matrix[i+num_trans] ;
00551 float64_t val = a_trans.matrix[i+num_trans*2] ;
00552 int32_t id = 0 ;
00553 if (num_cols==4)
00554 id = (int32_t)a_trans.matrix[i+num_trans*3] ;
00555
00556
00557 ASSERT(to_state>=0 && to_state<m_N);
00558 ASSERT(from_state>=0 && from_state<m_N);
00559
00560 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00561 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00562 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00563 trans_list_forward_cnt[to_state]++ ;
00564 m_transition_matrix_a.element(from_state, to_state) = val ;
00565 m_transition_matrix_a_id.element(from_state, to_state) = id ;
00566
00567 } ;
00568
00569 m_max_a_id = 0 ;
00570 for (int32_t i=0; i<m_N; i++)
00571 for (int32_t j=0; j<m_N; j++)
00572 {
00573
00574
00575 m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ;
00576 }
00577
00578 }
00579
00580
00581 void CDynProg::init_mod_words_array(SGMatrix<int32_t> mod_words_input)
00582 {
00583
00584
00585
00586
00587
00588
00589 m_svm_arrays_clean=false ;
00590
00591 ASSERT(m_num_svms==mod_words_input.num_rows);
00592 ASSERT(mod_words_input.num_cols==2);
00593
00594 m_mod_words.set_array(mod_words_input.matrix, mod_words_input.num_rows, 2, true, true) ;
00595 m_mod_words_array = m_mod_words.get_array() ;
00596
00597
00598
00599
00600
00601 }
00602
00603 bool CDynProg::check_svm_arrays()
00604 {
00605
00606 if ((m_word_degree.get_dim1()==m_num_degrees) &&
00607 (m_cum_num_words.get_dim1()==m_num_degrees+1) &&
00608 (m_num_words.get_dim1()==m_num_degrees) &&
00609
00610
00611
00612
00613
00614
00615 (m_num_unique_words.get_dim1()==m_num_degrees) &&
00616 (m_mod_words.get_dim1()==m_num_svms) &&
00617 (m_mod_words.get_dim2()==2) &&
00618 (m_sign_words.get_dim1()==m_num_svms) &&
00619 (m_string_words.get_dim1()==m_num_svms))
00620 {
00621 m_svm_arrays_clean=true ;
00622 return true ;
00623 }
00624 else
00625 {
00626 if ((m_num_unique_words.get_dim1()==m_num_degrees) &&
00627 (m_mod_words.get_dim1()==m_num_svms) &&
00628 (m_mod_words.get_dim2()==2) &&
00629 (m_sign_words.get_dim1()==m_num_svms) &&
00630 (m_string_words.get_dim1()==m_num_svms))
00631 SG_PRINT("OK\n") ;
00632 else
00633 SG_PRINT("not OK\n") ;
00634
00635 if (!(m_word_degree.get_dim1()==m_num_degrees))
00636 SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") ;
00637 if (!(m_cum_num_words.get_dim1()==m_num_degrees+1))
00638 SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") ;
00639 if (!(m_num_words.get_dim1()==m_num_degrees))
00640 SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") ;
00641
00642
00643 if (!(m_num_unique_words.get_dim1()==m_num_degrees))
00644 SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") ;
00645 if (!(m_mod_words.get_dim1()==m_num_svms))
00646 SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") ;
00647 if (!(m_mod_words.get_dim2()==2))
00648 SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") ;
00649 if (!(m_sign_words.get_dim1()==m_num_svms))
00650 SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") ;
00651 if (!(m_string_words.get_dim1()==m_num_svms))
00652 SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") ;
00653
00654 m_svm_arrays_clean=false ;
00655 return false ;
00656 }
00657 }
00658
00659 void CDynProg::set_observation_matrix(SGNDArray<float64_t> seq)
00660 {
00661 if (seq.num_dims!=3)
00662 SG_ERROR("Expected 3-dimensional Matrix\n");
00663
00664 int32_t N=seq.dims[0];
00665 int32_t cand_pos=seq.dims[1];
00666 int32_t max_num_features=seq.dims[2];
00667
00668 if (!m_svm_arrays_clean)
00669 {
00670 SG_ERROR( "SVM arrays not clean") ;
00671 return ;
00672 } ;
00673
00674 ASSERT(N==m_N);
00675 ASSERT(cand_pos==m_seq_len);
00676 ASSERT(m_initial_state_distribution_p.get_dim1()==N);
00677 ASSERT(m_end_state_distribution_q.get_dim1()==N);
00678
00679 m_observation_matrix.set_array(seq.array, N, m_seq_len, max_num_features, true, true) ;
00680 }
00681 int32_t CDynProg::get_num_positions()
00682 {
00683 return m_seq_len;
00684 }
00685
00686 void CDynProg::set_content_type_array(SGMatrix<float64_t> seg_path)
00687 {
00688 ASSERT(seg_path.num_rows==2);
00689 ASSERT(seg_path.num_cols==m_seq_len);
00690
00691 if (seg_path.matrix!=NULL)
00692 {
00693 int32_t *segment_ids = SG_MALLOC(int32_t, m_seq_len);
00694 float64_t *segment_mask = SG_MALLOC(float64_t, m_seq_len);
00695 for (int32_t i=0; i<m_seq_len; i++)
00696 {
00697 segment_ids[i] = (int32_t)seg_path.matrix[2*i] ;
00698 segment_mask[i] = seg_path.matrix[2*i+1] ;
00699 }
00700 best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ;
00701 SG_FREE(segment_ids);
00702 SG_FREE(segment_mask);
00703 }
00704 else
00705 {
00706 int32_t *izeros = SG_MALLOC(int32_t, m_seq_len);
00707 float64_t *dzeros = SG_MALLOC(float64_t, m_seq_len);
00708 for (int32_t i=0; i<m_seq_len; i++)
00709 {
00710 izeros[i]=0 ;
00711 dzeros[i]=0.0 ;
00712 }
00713 best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ;
00714 SG_FREE(izeros);
00715 SG_FREE(dzeros);
00716 }
00717 }
00718
00719 void CDynProg::set_pos(SGVector<int32_t> pos)
00720 {
00721 m_pos.set_array(pos.vector, pos.vlen, true, true) ;
00722 m_seq_len = pos.vlen;
00723 }
00724
00725 void CDynProg::set_orf_info(SGMatrix<int32_t> orf_info)
00726 {
00727 if (orf_info.num_cols!=2)
00728 SG_ERROR( "orf_info size incorrect %i!=2\n", orf_info.num_cols) ;
00729
00730 m_orf_info.set_array(orf_info.matrix, orf_info.num_rows, orf_info.num_cols, true, true) ;
00731 m_orf_info.set_array_name("orf_info") ;
00732 }
00733
00734 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2)
00735 {
00736 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
00737 SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n");
00738
00739 SG_UNREF(m_seq_sparse1);
00740 SG_UNREF(m_seq_sparse2);
00741
00742 m_seq_sparse1=seq_sparse1;
00743 m_seq_sparse2=seq_sparse2;
00744 SG_REF(m_seq_sparse1);
00745 SG_REF(m_seq_sparse2);
00746 }
00747
00748 void CDynProg::set_plif_matrices(CPlifMatrix* pm)
00749 {
00750 SG_UNREF(m_plif_matrices);
00751
00752 m_plif_matrices=pm;
00753
00754 SG_REF(m_plif_matrices);
00755 }
00756
00757 void CDynProg::set_gene_string(SGVector<char> genestr)
00758 {
00759 ASSERT(genestr.vector);
00760 ASSERT(genestr.vlen>0);
00761
00762 m_genestr.set_array(genestr.vector, genestr.vlen, true, true) ;
00763 }
00764
00765 void CDynProg::set_my_state_seq(int32_t* my_state_seq)
00766 {
00767 ASSERT(my_state_seq && m_seq_len>0);
00768 m_my_state_seq.resize_array(m_seq_len);
00769 for (int32_t i=0; i<m_seq_len; i++)
00770 m_my_state_seq[i]=my_state_seq[i];
00771 }
00772
00773 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq)
00774 {
00775 ASSERT(my_pos_seq && m_seq_len>0);
00776 m_my_pos_seq.resize_array(m_seq_len);
00777 for (int32_t i=0; i<m_seq_len; i++)
00778 m_my_pos_seq[i]=my_pos_seq[i];
00779 }
00780
00781 void CDynProg::set_dict_weights(SGMatrix<float64_t> dictionary_weights)
00782 {
00783 if (m_num_svms!=dictionary_weights.num_cols)
00784 {
00785 SG_ERROR( "m_dict_weights array does not match num_svms=%i!=%i\n",
00786 m_num_svms, dictionary_weights.num_cols) ;
00787 }
00788
00789 m_dict_weights.set_array(dictionary_weights.matrix, dictionary_weights.num_rows, m_num_svms, true, true) ;
00790
00791
00792 m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ;
00793 m_segment_loss.set_const(0) ;
00794 m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ;
00795 m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ;
00796 m_segment_ids.set_const(0) ;
00797 m_segment_mask.set_const(0) ;
00798 }
00799
00800 void CDynProg::best_path_set_segment_loss(SGMatrix<float64_t> segment_loss)
00801 {
00802 int32_t m=segment_loss.num_rows;
00803 int32_t n=segment_loss.num_cols;
00804
00805 if (2*m!=n)
00806 SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00807
00808 if (m!=m_max_a_id+1)
00809 SG_ERROR( "segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) ;
00810
00811 m_segment_loss.set_array(segment_loss.matrix, m, n/2, 2, true, true) ;
00812
00813
00814
00815 }
00816
00817 void CDynProg::best_path_set_segment_ids_mask(
00818 int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00819 {
00820
00821 if (m!=m_observation_matrix.get_dim2())
00822 SG_ERROR("size of segment_ids or segment_mask (%i) does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2());
00823 int32_t max_id = 0;
00824 for (int32_t i=1;i<m;i++)
00825 max_id = CMath::max(max_id,segment_ids[i]);
00826
00827 m_segment_ids.set_array(segment_ids, m, true, true) ;
00828 m_segment_ids.set_array_name("m_segment_ids");
00829 m_segment_mask.set_array(segment_mask, m, true, true) ;
00830 m_segment_mask.set_array_name("m_segment_mask");
00831
00832 m_seg_loss_obj->set_segment_mask(&m_segment_mask);
00833 m_seg_loss_obj->set_segment_ids(&m_segment_ids);
00834 m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len);
00835 }
00836
00837 SGVector<float64_t> CDynProg::get_scores()
00838 {
00839 SGVector<float64_t> scores(m_scores.get_dim1());
00840 memcpy(scores.vector,m_scores.get_array(), sizeof(float64_t)*(m_scores.get_dim1()));
00841
00842 return scores;
00843 }
00844
00845 SGMatrix<int32_t> CDynProg::get_states()
00846 {
00847 SGMatrix<int32_t> states(m_states.get_dim1(), m_states.get_dim2());
00848
00849 int32_t sz = sizeof(int32_t)*( m_states.get_dim1() * m_states.get_dim2() );
00850 memcpy(states.matrix ,m_states.get_array(),sz);
00851
00852 return states;
00853 }
00854
00855 SGMatrix<int32_t> CDynProg::get_positions()
00856 {
00857 SGMatrix<int32_t> positions(m_positions.get_dim1(), m_positions.get_dim2());
00858
00859 int32_t sz = sizeof(int32_t)*(m_positions.get_dim1()*m_positions.get_dim2());
00860 memcpy(positions.matrix, m_positions.get_array(),sz);
00861
00862 return positions;
00863 }
00864
00865 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len)
00866 {
00867 ASSERT(scores && seq_len);
00868
00869 *seq_len=m_my_scores.get_dim1();
00870
00871 int32_t sz = sizeof(float64_t)*(*seq_len);
00872
00873 *scores = SG_MALLOC(float64_t, *seq_len);
00874 ASSERT(*scores);
00875
00876 memcpy(*scores,m_my_scores.get_array(),sz);
00877 }
00878
00879 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len)
00880 {
00881 ASSERT(losses && seq_len);
00882
00883 *seq_len=m_my_losses.get_dim1();
00884
00885 int32_t sz = sizeof(float64_t)*(*seq_len);
00886
00887 *losses = SG_MALLOC(float64_t, *seq_len);
00888 ASSERT(*losses);
00889
00890 memcpy(*losses,m_my_losses.get_array(),sz);
00891 }
00892
00894
00895 bool CDynProg::extend_orf(
00896 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
00897 int32_t to)
00898 {
00899 #ifdef DYNPROG_TIMING_DETAIL
00900 MyTime.start() ;
00901 #endif
00902
00903 if (start<0)
00904 start=0 ;
00905 if (to<0)
00906 to=0 ;
00907
00908 int32_t orf_target = orf_to-orf_from ;
00909 if (orf_target<0) orf_target+=3 ;
00910
00911 int32_t pos ;
00912 if (last_pos==to)
00913 pos = to-orf_to-3 ;
00914 else
00915 pos=last_pos ;
00916
00917 if (pos<0)
00918 {
00919 #ifdef DYNPROG_TIMING_DETAIL
00920 MyTime.stop() ;
00921 orf_time += MyTime.time_diff_sec() ;
00922 #endif
00923 return true ;
00924 }
00925
00926 for (; pos>=start; pos-=3)
00927 if (m_genestr_stop[pos])
00928 {
00929 #ifdef DYNPROG_TIMING_DETAIL
00930 MyTime.stop() ;
00931 orf_time += MyTime.time_diff_sec() ;
00932 #endif
00933 return false ;
00934 }
00935
00936
00937 last_pos = CMath::min(pos+3,to-orf_to-3) ;
00938
00939 #ifdef DYNPROG_TIMING_DETAIL
00940 MyTime.stop() ;
00941 orf_time += MyTime.time_diff_sec() ;
00942 #endif
00943 return true ;
00944 }
00945
00946 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf,
00947 int16_t nbest, bool with_loss, bool with_multiple_sequences)
00948 {
00949
00950
00951
00952
00953
00954
00955
00956
00957 const float64_t* seq_array = m_observation_matrix.get_array();
00958 m_scores.resize_array(nbest) ;
00959 m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00960 m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00961
00962 for (int32_t i=0; i<nbest; i++)
00963 {
00964 m_scores[i]=-1;
00965 for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++)
00966 {
00967 m_states.element(i,j)=-1;
00968 m_positions.element(i,j)=-1;
00969 }
00970 }
00971 float64_t* prob_nbest=m_scores.get_array();
00972 int32_t* my_state_seq=m_states.get_array();
00973 int32_t* my_pos_seq=m_positions.get_array();
00974
00975 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
00976 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
00977
00978
00979
00980 #ifdef DYNPROG_TIMING
00981 segment_init_time = 0.0 ;
00982 segment_pos_time = 0.0 ;
00983 segment_extend_time = 0.0 ;
00984 segment_clean_time = 0.0 ;
00985 orf_time = 0.0 ;
00986 svm_init_time = 0.0 ;
00987 svm_pos_time = 0.0 ;
00988 svm_clean_time = 0.0 ;
00989 inner_loop_time = 0.0 ;
00990 content_svm_values_time = 0.0 ;
00991 content_plifs_time = 0.0 ;
00992 inner_loop_max_time = 0.0 ;
00993 long_transition_time = 0.0 ;
00994
00995 MyTime2.start() ;
00996 #endif
00997
00998 if (!m_svm_arrays_clean)
00999 {
01000 SG_ERROR( "SVM arrays not clean") ;
01001 return ;
01002 }
01003
01004 #ifdef DYNPROG_DEBUG
01005 m_transition_matrix_a.set_array_name("transition_matrix");
01006 m_transition_matrix_a.display_array();
01007 m_mod_words.display_array() ;
01008 m_sign_words.display_array() ;
01009 m_string_words.display_array() ;
01010
01011 #endif
01012
01013 int32_t max_look_back = 1000 ;
01014 bool use_svm = false ;
01015
01016 SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) ;
01017
01018
01019
01020
01021 CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ;
01022 PEN.set_array_name("PEN");
01023
01024 CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ;
01025 PEN_state_signals.set_array_name("state_signals");
01026
01027 CDynamicArray<float64_t> seq(m_N, m_seq_len) ;
01028 seq.set_array_name("seq") ;
01029 seq.set_const(0) ;
01030
01031 #ifdef DYNPROG_DEBUG
01032 SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data);
01033 SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs);
01034 SG_PRINT("m_num_svms: %i\n", m_num_svms);
01035 SG_PRINT("m_num_lin_feat_plifs_cum: ");
01036 for (int i=0; i<=m_num_raw_data; i++)
01037 SG_PRINT(" %i ",m_num_lin_feat_plifs_cum[i]);
01038 SG_PRINT("\n");
01039 #endif
01040
01041 float64_t* svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
01042 {
01043 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01044 svm_value[s]=0 ;
01045 }
01046
01047 {
01048
01049
01050 CDynamicArray<float64_t> *seq_input=NULL ;
01051 if (seq_array!=NULL)
01052 {
01053
01054
01055 seq_input=new CDynamicArray<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ;
01056 seq_input->set_array_name("seq_input") ;
01057
01058
01059 ASSERT(m_seq_sparse1==NULL) ;
01060 ASSERT(m_seq_sparse2==NULL) ;
01061 } else
01062 {
01063 SG_PRINT("using sparse seq_array\n") ;
01064
01065 ASSERT(m_seq_sparse1!=NULL) ;
01066 ASSERT(m_seq_sparse2!=NULL) ;
01067 ASSERT(max_num_signals==2) ;
01068 }
01069
01070 for (int32_t i=0; i<m_N; i++)
01071 for (int32_t j=0; j<m_seq_len; j++)
01072 seq.element(i,j) = 0 ;
01073
01074 for (int32_t i=0; i<m_N; i++)
01075 for (int32_t j=0; j<m_seq_len; j++)
01076 for (int32_t k=0; k<max_num_signals; k++)
01077 {
01078 if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
01079 {
01080
01081 if (seq_input!=NULL)
01082 seq.element(i,j) = seq_input->element(i,j,k) ;
01083 else
01084 {
01085 if (k==0)
01086 seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ;
01087 if (k==1)
01088 seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ;
01089 }
01090 break ;
01091 }
01092 if (PEN_state_signals.element(i,k)!=NULL)
01093 {
01094 if (seq_input!=NULL)
01095 {
01096
01097 if (CMath::is_finite(seq_input->element(i,j,k)))
01098 seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(seq_input->element(i,j,k), svm_value) ;
01099 else
01100
01101 seq.element(i,j) = seq_input->element(i, j, k) ;
01102 }
01103 else
01104 {
01105 if (k==0)
01106 {
01107
01108 if (CMath::is_finite(m_seq_sparse1->get_feature(i,j)))
01109 seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ;
01110 else
01111
01112 seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ;
01113 }
01114 if (k==1)
01115 {
01116
01117 if (CMath::is_finite(m_seq_sparse2->get_feature(i,j)))
01118 seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ;
01119 else
01120
01121 seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ;
01122 }
01123 }
01124 }
01125 else
01126 break ;
01127 }
01128 delete seq_input;
01129 SG_FREE(svm_value);
01130 }
01131
01132
01133 bool long_transitions = m_long_transitions ;
01134 CDynamicArray<int32_t> long_transition_content_start_position(m_N,m_N) ;
01135 long_transition_content_start_position.set_array_name("long_transition_content_start_position");
01136 #ifdef DYNPROG_DEBUG
01137 CDynamicArray<int32_t> long_transition_content_end_position(m_N,m_N) ;
01138 long_transition_content_end_position.set_array_name("long_transition_content_end_position");
01139 #endif
01140 CDynamicArray<int32_t> long_transition_content_start(m_N,m_N) ;
01141 long_transition_content_start.set_array_name("long_transition_content_start");
01142
01143 CDynamicArray<float64_t> long_transition_content_scores(m_N,m_N) ;
01144 long_transition_content_scores.set_array_name("long_transition_content_scores");
01145 #ifdef DYNPROG_DEBUG
01146
01147 CDynamicArray<float64_t> long_transition_content_scores_pen(m_N,m_N) ;
01148 long_transition_content_scores_pen.set_array_name("long_transition_content_scores_pen");
01149
01150 CDynamicArray<float64_t> long_transition_content_scores_prev(m_N,m_N) ;
01151 long_transition_content_scores_prev.set_array_name("long_transition_content_scores_prev");
01152
01153 CDynamicArray<float64_t> long_transition_content_scores_elem(m_N,m_N) ;
01154 long_transition_content_scores_elem.set_array_name("long_transition_content_scores_elem");
01155 #endif
01156 CDynamicArray<float64_t> long_transition_content_scores_loss(m_N,m_N) ;
01157 long_transition_content_scores_loss.set_array_name("long_transition_content_scores_loss");
01158
01159 if (nbest!=1)
01160 {
01161 SG_ERROR("Long transitions are not supported for nbest!=1") ;
01162 long_transitions = false ;
01163 }
01164 long_transition_content_scores.set_const(-CMath::INFTY);
01165 #ifdef DYNPROG_DEBUG
01166 long_transition_content_scores_pen.set_const(0) ;
01167 long_transition_content_scores_elem.set_const(0) ;
01168 long_transition_content_scores_prev.set_const(0) ;
01169 #endif
01170 if (with_loss)
01171 long_transition_content_scores_loss.set_const(0) ;
01172 long_transition_content_start.set_const(0) ;
01173 long_transition_content_start_position.set_const(0) ;
01174 #ifdef DYNPROG_DEBUG
01175 long_transition_content_end_position.set_const(0) ;
01176 #endif
01177
01178 svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
01179 {
01180 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01181 svm_value[s]=0 ;
01182 }
01183
01184 CDynamicArray<int32_t> look_back(m_N,m_N) ;
01185 look_back.set_array_name("look_back");
01186
01187
01188
01189
01190 {
01191 for (int32_t i=0; i<m_N; i++)
01192 for (int32_t j=0; j<m_N; j++)
01193 {
01194 look_back.set_element(INT_MAX, i, j) ;
01195
01196 }
01197
01198 for (int32_t j=0; j<m_N; j++)
01199 {
01200
01201 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01202 const T_STATES *elem_list = trans_list_forward[j] ;
01203
01204 for (int32_t i=0; i<num_elem; i++)
01205 {
01206 T_STATES ii = elem_list[i] ;
01207
01208 CPlifBase *penij=(CPlifBase*) PEN.element(j, ii) ;
01209 if (penij==NULL)
01210 {
01211 if (long_transitions)
01212 {
01213 look_back.set_element(m_long_transition_threshold, j, ii) ;
01214
01215 }
01216 continue ;
01217 }
01218
01219
01220 if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions))
01221 {
01222 look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01223
01224 if (CMath::ceil(penij->get_max_value()) > max_look_back)
01225 {
01226 SG_DEBUG( "%d %d -> value: %f\n", ii,j,penij->get_max_value());
01227 max_look_back = (int32_t) (CMath::ceil(penij->get_max_value()));
01228 }
01229 }
01230 else
01231 {
01232 look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ;
01233
01234 }
01235
01236 if (penij->uses_svm_values())
01237 use_svm=true ;
01238 }
01239 }
01240
01241 if (long_transitions)
01242 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ;
01243
01244
01245 max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ;
01246
01247 int32_t num_long_transitions = 0 ;
01248 for (int32_t i=0; i<m_N; i++)
01249 for (int32_t j=0; j<m_N; j++)
01250 {
01251 if (look_back.get_element(i,j)==m_long_transition_threshold)
01252 num_long_transitions++ ;
01253 if (look_back.get_element(i,j)==INT_MAX)
01254 {
01255 if (long_transitions)
01256 {
01257 look_back.set_element(m_long_transition_threshold, i, j) ;
01258
01259 }
01260 else
01261 {
01262 look_back.set_element(max_look_back, i, j) ;
01263
01264 }
01265 }
01266 }
01267 SG_DEBUG("Using %i long transitions\n", num_long_transitions) ;
01268 }
01269
01270
01271
01272 SG_DEBUG("use_svm=%i\n", use_svm) ;
01273
01274 SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest);
01275 const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ;
01276 SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290
01291 ASSERT(nbest<32000) ;
01292
01293 CDynamicArray<float64_t> delta(m_seq_len, m_N, nbest) ;
01294 delta.set_array_name("delta");
01295 float64_t* delta_array = delta.get_array() ;
01296
01297
01298 CDynamicArray<T_STATES> psi(m_seq_len, m_N, nbest) ;
01299 psi.set_array_name("psi");
01300
01301
01302 CDynamicArray<int16_t> ktable(m_seq_len, m_N, nbest) ;
01303 ktable.set_array_name("ktable");
01304
01305
01306 CDynamicArray<int32_t> ptable(m_seq_len, m_N, nbest) ;
01307 ptable.set_array_name("ptable");
01308
01309
01310 CDynamicArray<float64_t> delta_end(nbest) ;
01311 delta_end.set_array_name("delta_end");
01312
01313
01314 CDynamicArray<T_STATES> path_ends(nbest) ;
01315 path_ends.set_array_name("path_ends");
01316
01317
01318 CDynamicArray<int16_t> ktable_end(nbest) ;
01319 ktable_end.set_array_name("ktable_end");
01320
01321
01322 float64_t * fixedtempvv=SG_MALLOC(float64_t, look_back_buflen);
01323 memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
01324 int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen);
01325 memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
01326
01327 CDynamicArray<float64_t> oldtempvv(look_back_buflen) ;
01328 oldtempvv.set_array_name("oldtempvv");
01329
01330 CDynamicArray<float64_t> oldtempvv2(look_back_buflen) ;
01331 oldtempvv2.set_array_name("oldtempvv2");
01332
01333
01334
01335 CDynamicArray<int32_t> oldtempii(look_back_buflen) ;
01336 oldtempii.set_array_name("oldtempii");
01337
01338 CDynamicArray<int32_t> oldtempii2(look_back_buflen) ;
01339 oldtempii2.set_array_name("oldtempii2");
01340
01341
01342 CDynamicArray<T_STATES> state_seq(m_seq_len) ;
01343 state_seq.set_array_name("state_seq");
01344
01345
01346 CDynamicArray<int32_t> pos_seq(m_seq_len) ;
01347 pos_seq.set_array_name("pos_seq");
01348
01349
01350
01351 m_dict_weights.set_array_name("dict_weights") ;
01352 m_word_degree.set_array_name("word_degree") ;
01353 m_cum_num_words.set_array_name("cum_num_words") ;
01354 m_num_words.set_array_name("num_words") ;
01355
01356
01357
01358 m_num_unique_words.set_array_name("num_unique_words") ;
01359
01360 PEN.set_array_name("PEN") ;
01361 seq.set_array_name("seq") ;
01362
01363 delta.set_array_name("delta") ;
01364 psi.set_array_name("psi") ;
01365 ktable.set_array_name("ktable") ;
01366 ptable.set_array_name("ptable") ;
01367 delta_end.set_array_name("delta_end") ;
01368 path_ends.set_array_name("path_ends") ;
01369 ktable_end.set_array_name("ktable_end") ;
01370
01371 #ifdef USE_TMP_ARRAYCLASS
01372 fixedtempvv.set_array_name("fixedtempvv") ;
01373 fixedtempii.set_array_name("fixedtempvv") ;
01374 #endif
01375
01376 oldtempvv.set_array_name("oldtempvv") ;
01377 oldtempvv2.set_array_name("oldtempvv2") ;
01378 oldtempii.set_array_name("oldtempii") ;
01379 oldtempii2.set_array_name("oldtempii2") ;
01380
01381
01383
01384 #ifdef DYNPROG_DEBUG
01385 state_seq.display_size() ;
01386 pos_seq.display_size() ;
01387
01388 m_dict_weights.display_size() ;
01389 m_word_degree.display_array() ;
01390 m_cum_num_words.display_array() ;
01391 m_num_words.display_array() ;
01392
01393
01394
01395 m_num_unique_words.display_array() ;
01396
01397 PEN.display_size() ;
01398 PEN_state_signals.display_size() ;
01399 seq.display_size() ;
01400 m_orf_info.display_size() ;
01401
01402
01403 delta.display_size() ;
01404 psi.display_size() ;
01405 ktable.display_size() ;
01406 ptable.display_size() ;
01407 delta_end.display_size() ;
01408 path_ends.display_size() ;
01409 ktable_end.display_size() ;
01410
01411 #ifdef USE_TMP_ARRAYCLASS
01412 fixedtempvv.display_size() ;
01413 fixedtempii.display_size() ;
01414 #endif
01415
01416
01417
01418
01419 state_seq.display_size() ;
01420 pos_seq.display_size() ;
01421
01422
01423
01424 #endif //DYNPROG_DEBUG
01425
01427
01428
01429
01430 {
01431 for (int32_t s=0; s<m_num_svms; s++)
01432 ASSERT(m_string_words_array[s]<1) ;
01433 }
01434
01435
01436
01437
01438
01439 {
01440
01441 for (T_STATES i=0; i<m_N; i++)
01442 {
01443
01444 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ;
01445 psi.element(0,i,0) = 0 ;
01446 if (nbest>1)
01447 ktable.element(0,i,0) = 0 ;
01448 ptable.element(0,i,0) = 0 ;
01449 for (int16_t k=1; k<nbest; k++)
01450 {
01451 int32_t dim1, dim2, dim3 ;
01452 delta.get_array_size(dim1, dim2, dim3) ;
01453
01454
01455 delta.element(delta_array, 0, i, k, m_seq_len, m_N) = -CMath::INFTY ;
01456 psi.element(0,i,0) = 0 ;
01457 if (nbest>1)
01458 ktable.element(0,i,k) = 0 ;
01459 ptable.element(0,i,k) = 0 ;
01460 }
01461
01462
01463
01464
01465
01466
01467
01468
01469
01470
01471
01472
01473
01474
01475
01476
01477
01478
01479 }
01480 }
01481
01482 SG_DEBUG("START_RECURSION \n\n");
01483
01484
01485 for (int32_t t=1; t<m_seq_len; t++)
01486 {
01487
01488
01489
01490
01491 for (T_STATES j=0; j<m_N; j++)
01492 {
01493 if (seq.element(j,t)<=-1e20)
01494 {
01495 for (int16_t k=0; k<nbest; k++)
01496 {
01497 delta.element(delta_array, t, j, k, m_seq_len, m_N) = seq.element(j,t) ;
01498 psi.element(t,j,k) = 0 ;
01499 if (nbest>1)
01500 ktable.element(t,j,k) = 0 ;
01501 ptable.element(t,j,k) = 0 ;
01502 }
01503 }
01504 else
01505 {
01506 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01507 const T_STATES *elem_list = trans_list_forward[j] ;
01508 const float64_t *elem_val = trans_list_forward_val[j] ;
01509 const int32_t *elem_id = trans_list_forward_id[j] ;
01510
01511 int32_t fixed_list_len = 0 ;
01512 float64_t fixedtempvv_ = CMath::INFTY ;
01513 int32_t fixedtempii_ = 0 ;
01514 bool fixedtemplong = false ;
01515
01516 for (int32_t i=0; i<num_elem; i++)
01517 {
01518 T_STATES ii = elem_list[i] ;
01519
01520 const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ;
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532
01533 int32_t look_back_ = look_back.element(j, ii) ;
01534
01535 int32_t orf_from = m_orf_info.element(ii,0) ;
01536 int32_t orf_to = m_orf_info.element(j,1) ;
01537 if((orf_from!=-1)!=(orf_to!=-1))
01538 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01539 ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01540
01541 int32_t orf_target = -1 ;
01542 if (orf_from!=-1)
01543 {
01544 orf_target=orf_to-orf_from ;
01545 if (orf_target<0)
01546 orf_target+=3 ;
01547 ASSERT(orf_target>=0 && orf_target<3) ;
01548 }
01549
01550 int32_t orf_last_pos = m_pos[t] ;
01551 #ifdef DYNPROG_TIMING
01552 MyTime3.start() ;
01553 #endif
01554 int32_t num_ok_pos = 0 ;
01555
01556 for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--)
01557 {
01558 bool ok ;
01559
01560
01561
01562
01563
01564
01565
01566
01567
01568 if (orf_target==-1)
01569 ok=true ;
01570 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
01571 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
01572 else
01573 ok=false ;
01574
01575 if (ok)
01576 {
01577
01578 float64_t segment_loss = 0.0 ;
01579 if (with_loss)
01580 {
01581 segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]);
01582
01583
01584 }
01586
01588
01589 int32_t frame = orf_from;
01590 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01591
01592 float64_t pen_val = 0.0 ;
01593 if (penalty)
01594 {
01595 #ifdef DYNPROG_TIMING_DETAIL
01596 MyTime.start() ;
01597 #endif
01598 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01599
01600 #ifdef DYNPROG_TIMING_DETAIL
01601 MyTime.stop() ;
01602 content_plifs_time += MyTime.time_diff_sec() ;
01603 #endif
01604 }
01605
01606 #ifdef DYNPROG_TIMING_DETAIL
01607 MyTime.start() ;
01608 #endif
01609 num_ok_pos++ ;
01610
01611 if (nbest==1)
01612 {
01613 float64_t val = elem_val[i] + pen_val ;
01614 if (with_loss)
01615 val += segment_loss ;
01616
01617 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
01618
01619 if (mval<fixedtempvv_)
01620 {
01621 fixedtempvv_ = mval ;
01622 fixedtempii_ = ii + ts*m_N;
01623 fixed_list_len = 1 ;
01624 fixedtemplong = false ;
01625 }
01626 }
01627 else
01628 {
01629 for (int16_t diff=0; diff<nbest; diff++)
01630 {
01631 float64_t val = elem_val[i] ;
01632 val += pen_val ;
01633 if (with_loss)
01634 val += segment_loss ;
01635
01636 float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
01637
01638
01639
01640
01641
01642 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
01643 {
01644 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
01645 {
01646 fixedtempvv[fixed_list_len] = mval ;
01647 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
01648 fixed_list_len++ ;
01649 }
01650 else
01651 {
01652 int32_t addhere = fixed_list_len;
01653 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
01654 addhere--;
01655
01656
01657 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
01658 {
01659 fixedtempvv[jj] = fixedtempvv[jj-1];
01660 fixedtempii[jj] = fixedtempii[jj-1];
01661 }
01662
01663 fixedtempvv[addhere] = mval;
01664 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
01665
01666 if (fixed_list_len < nbest)
01667 fixed_list_len++;
01668 }
01669 }
01670 }
01671 }
01672 #ifdef DYNPROG_TIMING_DETAIL
01673 MyTime.stop() ;
01674 inner_loop_max_time += MyTime.time_diff_sec() ;
01675 #endif
01676 }
01677 }
01678 #ifdef DYNPROG_TIMING
01679 MyTime3.stop() ;
01680 inner_loop_time += MyTime3.time_diff_sec() ;
01681 #endif
01682 }
01683 for (int32_t i=0; i<num_elem; i++)
01684 {
01685 T_STATES ii = elem_list[i] ;
01686
01687 const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ;
01688
01689
01690
01691
01692
01693
01694
01695
01696
01697
01698
01699
01700 int32_t look_back_ = look_back.element(j, ii) ;
01701
01702
01703 int32_t orf_from = m_orf_info.element(ii,0) ;
01704 int32_t orf_to = m_orf_info.element(j,1) ;
01705 if((orf_from!=-1)!=(orf_to!=-1))
01706 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01707 ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01708
01709 int32_t orf_target = -1 ;
01710 if (orf_from!=-1)
01711 {
01712 orf_target=orf_to-orf_from ;
01713 if (orf_target<0)
01714 orf_target+=3 ;
01715 ASSERT(orf_target>=0 && orf_target<3) ;
01716 }
01717
01718
01719
01720
01721 #ifdef DYNPROG_TIMING
01722 MyTime3.start() ;
01723 #endif
01724
01725
01726
01727
01728
01729
01730
01731
01732 #ifdef DYNPROG_TIMING
01733 MyTime3.start() ;
01734 #endif
01735
01736 if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold )
01737 {
01738
01739
01740
01741 int32_t start = long_transition_content_start.get_element(ii, j) ;
01742 int32_t end_5p_part = start ;
01743 for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++)
01744 {
01745
01746 while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold)
01747 end_5p_part++ ;
01748
01749 ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t) ;
01750 ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold) ;
01751
01752 float64_t pen_val = 0.0;
01753
01754 if (penalty)
01755 {
01756 int32_t frame = m_orf_info.element(ii,0);
01757 lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame);
01758 pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ;
01759 }
01760
01761
01762
01763
01764
01765
01766
01767 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ;
01768
01769
01770 float64_t segment_loss_part1=0.0 ;
01771 if (with_loss)
01772 {
01773
01774 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part , end_5p_part, elem_id[i]);
01775
01776 mval_trans -= segment_loss_part1 ;
01777 }
01778
01779
01780 if (0)
01781 {
01782
01783
01784
01785
01786 long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ;
01787 long_transition_content_start_position.set_element(0, ii, j) ;
01788 if (with_loss)
01789 long_transition_content_scores_loss.set_element(0.0, ii, j) ;
01790 #ifdef DYNPROG_DEBUG
01791 long_transition_content_scores_pen.set_element(0.0, ii, j) ;
01792 long_transition_content_scores_elem.set_element(0.0, ii, j) ;
01793 long_transition_content_scores_prev.set_element(0.0, ii, j) ;
01794 long_transition_content_end_position.set_element(0, ii, j) ;
01795 #endif
01796 }
01797 if (with_loss)
01798 {
01799 float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ;
01800 float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]);
01801 float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ;
01802 long_transition_content_scores.set_element(score, ii, j) ;
01803 long_transition_content_scores_loss.set_element(new_loss, ii, j) ;
01804 #ifdef DYNPROG_DEBUG
01805 long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01806 #endif
01807
01808 }
01809 if (-long_transition_content_scores.get_element(ii, j) > mval_trans )
01810 {
01811
01812 long_transition_content_scores.set_element(-mval_trans, ii, j) ;
01813 long_transition_content_start_position.set_element(start_5p_part, ii, j) ;
01814 if (with_loss)
01815 long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ;
01816 #ifdef DYNPROG_DEBUG
01817 long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ;
01818 long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ;
01819 long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ;
01820
01821
01822
01823 long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01824 #endif
01825 }
01826
01827
01828
01829
01830 long_transition_content_start.set_element(start_5p_part, ii, j) ;
01831 }
01832
01833
01834
01835
01836
01837
01838
01839 int ts = t;
01840 while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
01841 ts-- ;
01842
01843 if (ts>0)
01844 {
01845 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ;
01846
01847
01848
01849 float pen_val_3p = 0.0 ;
01850 if (penalty)
01851 {
01852 int32_t frame = orf_from ;
01853 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01854 pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01855 }
01856
01857 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ;
01858
01859 {
01860 #ifdef DYNPROG_DEBUG
01861 float64_t segment_loss_part2=0.0 ;
01862 float64_t segment_loss_part1=0.0 ;
01863 #endif
01864 float64_t segment_loss_total=0.0 ;
01865
01866 if (with_loss)
01867 {
01868
01869
01870 #ifdef DYNPROG_DEBUG
01871
01872 segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
01873
01874 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
01875 #endif
01876 segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
01877 mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
01878 }
01879
01880 #ifdef DYNPROG_DEBUG
01881 if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561)
01882 {
01883 SG_PRINT("Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n",
01884 m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1],
01885 long_transition_content_scores.get_element(ii, j),
01886 long_transition_content_scores_pen.get_element(ii, j),
01887 long_transition_content_scores_prev.get_element(ii, j),
01888 long_transition_content_scores_elem.get_element(ii, j),
01889 long_transition_content_scores_loss.get_element(ii, j),
01890 m_pos[long_transition_content_start_position.get_element(ii,j)],
01891 m_pos[long_transition_content_end_position.get_element(ii,j)],
01892 m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
01893 SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] );
01894 }
01895
01896 if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
01897 {
01898 SG_ERROR("LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n",
01899 segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t],
01900 long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
01901 segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t],
01902 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j),
01903 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
01904 }
01905 #endif
01906 }
01907
01908
01909
01910
01911
01912
01913 if (mval < fixedtempvv_)
01914 {
01915
01916 int32_t fromtjk = fixedtempii_ ;
01917
01918
01919
01920
01921
01922 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ;
01923
01924 fixedtempvv_ = mval ;
01925 fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ;
01926 fixed_list_len = 1 ;
01927 fixedtemplong = true ;
01928 }
01929 }
01930 }
01931 }
01932 #ifdef DYNPROG_TIMING
01933 MyTime3.stop() ;
01934 long_transition_time += MyTime3.time_diff_sec() ;
01935 #endif
01936
01937
01938 int32_t numEnt = fixed_list_len;
01939
01940 float64_t minusscore;
01941 int64_t fromtjk;
01942
01943 for (int16_t k=0; k<nbest; k++)
01944 {
01945 if (k<numEnt)
01946 {
01947 if (nbest==1)
01948 {
01949 minusscore = fixedtempvv_ ;
01950 fromtjk = fixedtempii_ ;
01951 }
01952 else
01953 {
01954 minusscore = fixedtempvv[k];
01955 fromtjk = fixedtempii[k];
01956 }
01957
01958 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.element(j,t);
01959 psi.element(t,j,k) = (fromtjk%m_N) ;
01960 if (nbest>1)
01961 ktable.element(t,j,k) = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ;
01962 ptable.element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
01963 }
01964 else
01965 {
01966 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -CMath::INFTY ;
01967 psi.element(t,j,k) = 0 ;
01968 if (nbest>1)
01969 ktable.element(t,j,k) = 0 ;
01970 ptable.element(t,j,k) = 0 ;
01971 }
01972 }
01973 }
01974 }
01975 }
01976 {
01977 int32_t list_len = 0 ;
01978 for (int16_t diff=0; diff<nbest; diff++)
01979 {
01980 for (T_STATES i=0; i<m_N; i++)
01981 {
01982 oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ;
01983 oldtempii[list_len] = i + diff*m_N ;
01984 list_len++ ;
01985 }
01986 }
01987
01988 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
01989
01990 for (int16_t k=0; k<nbest; k++)
01991 {
01992 delta_end.element(k) = -oldtempvv[k] ;
01993 path_ends.element(k) = (oldtempii[k]%m_N) ;
01994 if (nbest>1)
01995 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ;
01996 }
01997
01998
01999 }
02000
02001 {
02002 for (int16_t k=0; k<nbest; k++)
02003 {
02004 prob_nbest[k]= delta_end.element(k) ;
02005
02006 int32_t i = 0 ;
02007 state_seq[i] = path_ends.element(k) ;
02008 int16_t q = 0 ;
02009 if (nbest>1)
02010 q=ktable_end.element(k) ;
02011 pos_seq[i] = m_seq_len-1 ;
02012
02013 while (pos_seq[i]>0)
02014 {
02015 ASSERT(i+1<m_seq_len);
02016
02017 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02018 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
02019 if (nbest>1)
02020 q = ktable.element(pos_seq[i], state_seq[i], q) ;
02021 i++ ;
02022 }
02023
02024 int32_t num_states = i+1 ;
02025 for (i=0; i<num_states;i++)
02026 {
02027 my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ;
02028 my_pos_seq[i+k*m_seq_len] = pos_seq[num_states-i-1] ;
02029 }
02030 if (num_states<m_seq_len)
02031 {
02032 my_state_seq[num_states+k*m_seq_len]=-1 ;
02033 my_pos_seq[num_states+k*m_seq_len]=-1 ;
02034 }
02035 }
02036 }
02037
02038
02039
02040
02041
02042 #ifdef DYNPROG_TIMING
02043 MyTime2.stop() ;
02044
02045
02046 SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) ;
02047 #endif
02048
02049 SG_FREE(fixedtempvv);
02050 SG_FREE(fixedtempii);
02051 }
02052
02053
02054 void CDynProg::best_path_trans_deriv(
02055 int32_t *my_state_seq, int32_t *my_pos_seq,
02056 int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
02057 {
02058 m_initial_state_distribution_p_deriv.resize_array(m_N) ;
02059 m_end_state_distribution_q_deriv.resize_array(m_N) ;
02060 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
02061
02062
02063 m_my_scores.resize_array(my_seq_len);
02064 m_my_losses.resize_array(my_seq_len);
02065 float64_t* my_scores=m_my_scores.get_array();
02066 float64_t* my_losses=m_my_losses.get_array();
02067 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
02068 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
02069
02070 if (!m_svm_arrays_clean)
02071 {
02072 SG_ERROR( "SVM arrays not clean") ;
02073 return ;
02074 } ;
02075
02076
02077
02078
02079
02080 bool use_svm = false ;
02081
02082 CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ;
02083 PEN.set_array_name("PEN");
02084
02085 CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ;
02086 PEN_state_signals.set_array_name("PEN_state_signals");
02087
02088 CDynamicArray<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
02089 seq_input.set_array_name("seq_input");
02090
02091 {
02092 for (int32_t i=0; i<m_N; i++)
02093 for (int32_t j=0; j<m_N; j++)
02094 {
02095 CPlifBase* penij=(CPlifBase*) PEN.element(i,j) ;
02096 if (penij==NULL)
02097 continue ;
02098
02099 if (penij->uses_svm_values())
02100 use_svm=true ;
02101 penij->penalty_clear_derivative() ;
02102 }
02103 for (int32_t i=0; i<m_N; i++)
02104 for (int32_t j=0; j<max_num_signals; j++)
02105 {
02106 CPlifBase* penij=(CPlifBase*) PEN_state_signals.element(i,j) ;
02107 if (penij==NULL)
02108 continue ;
02109 if (penij->uses_svm_values())
02110 use_svm=true ;
02111 penij->penalty_clear_derivative() ;
02112 }
02113 }
02114
02115 {
02116
02117 for (int32_t i=0; i<m_N; i++)
02118 {
02119 m_initial_state_distribution_p_deriv.element(i)=0 ;
02120 m_end_state_distribution_q_deriv.element(i)=0 ;
02121 for (int32_t j=0; j<m_N; j++)
02122 m_transition_matrix_a_deriv.element(i,j)=0 ;
02123 }
02124 }
02125
02126 {
02127 for (int32_t i=0; i<my_seq_len; i++)
02128 {
02129 my_scores[i]=0.0 ;
02130 my_losses[i]=0.0 ;
02131 }
02132 }
02133
02134
02135
02136
02137
02138
02139
02140 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02141 float64_t* svm_value_part1 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02142 float64_t* svm_value_part2 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02143 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02144 {
02145 svm_value[s]=0 ;
02146 svm_value_part1[s]=0 ;
02147 svm_value_part2[s]=0 ;
02148 }
02149
02150
02151 float64_t total_score = 0.0 ;
02152 float64_t total_loss = 0.0 ;
02153
02154
02155 ASSERT(my_state_seq[0]>=0) ;
02156 m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
02157 my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ;
02158
02159 ASSERT(my_state_seq[my_seq_len-1]>=0) ;
02160 m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
02161 my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
02162
02163
02164 total_score += my_scores[0] + my_scores[my_seq_len-1] ;
02165
02166
02167 SG_DEBUG( "m_seq_len=%i\n", my_seq_len) ;
02168 for (int32_t i=0; i<my_seq_len-1; i++)
02169 {
02170 if (my_state_seq[i+1]==-1)
02171 break ;
02172 int32_t from_state = my_state_seq[i] ;
02173 int32_t to_state = my_state_seq[i+1] ;
02174 int32_t from_pos = my_pos_seq[i] ;
02175 int32_t to_pos = my_pos_seq[i+1] ;
02176
02177 int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ;
02178 my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id);
02179
02180 #ifdef DYNPROG_DEBUG
02181
02182
02183 if (i>0)
02184 {
02185 float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id);
02186 float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id);
02187 float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id);
02188 SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3);
02189 if (CMath::abs(loss1+loss2-loss3)>0)
02190 {
02191 SG_PRINT( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02192 }
02193 }
02194 io->set_loglevel(M_DEBUG) ;
02195 SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02196 #endif
02197
02198 m_transition_matrix_a_deriv.element(from_state, to_state)++ ;
02199 my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ;
02200
02201 #ifdef DYNPROG_DEBUG
02202 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02203 #endif
02204
02205
02206
02207
02208
02209 bool is_long_transition = false ;
02210 if (m_long_transitions)
02211 {
02212 if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold)
02213 is_long_transition = true ;
02214 if (m_orf_info.element(from_state,0)!=-1)
02215 is_long_transition = false ;
02216 }
02217
02218 int32_t from_pos_thresh = from_pos ;
02219 int32_t to_pos_thresh = to_pos ;
02220
02221 if (use_svm)
02222 {
02223 if (is_long_transition)
02224 {
02225
02226 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold)
02227 from_pos_thresh++ ;
02228 ASSERT(from_pos_thresh<to_pos) ;
02229 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold);
02230 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold);
02231
02232 int32_t frame = m_orf_info.element(from_state,0);
02233 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame);
02234
02235 #ifdef DYNPROG_DEBUG
02236 SG_PRINT("part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) ;
02237 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02238 SG_PRINT("%1.4f ", svm_value_part1[s]);
02239 SG_PRINT("\n");
02240 #endif
02241
02242 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold)
02243 to_pos_thresh-- ;
02244 ASSERT(to_pos_thresh>0) ;
02245 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) ;
02246 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) ;
02247
02248 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame);
02249
02250 #ifdef DYNPROG_DEBUG
02251 SG_PRINT("part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) ;
02252 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02253 SG_PRINT("%1.4f ", svm_value_part2[s]);
02254 SG_PRINT("\n");
02255 #endif
02256 }
02257 else
02258 {
02259
02260
02261
02262 int32_t frame = m_orf_info.element(from_state,0);
02263 if (false)
02264 {
02265 int32_t num_current_svms=0;
02266 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02267 SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame);
02268 ((CPlifBase*) PEN.element(to_state, from_state))->get_used_svms(&num_current_svms, svm_ids);
02269 SG_PRINT("\n");
02270 }
02271
02272 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame);
02273 #ifdef DYNPROG_DEBUG
02274 SG_PRINT("part2: pos1: %i pos2: %i \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) ;
02275 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02276 SG_PRINT("%1.4f ", svm_value[s]);
02277 SG_PRINT("\n");
02278 #endif
02279 }
02280 }
02281
02282 if (PEN.element(to_state, from_state)!=NULL)
02283 {
02284 float64_t nscore = 0 ;
02285 if (is_long_transition)
02286 {
02287 float64_t pen_value_part1 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ;
02288 float64_t pen_value_part2 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ;
02289 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
02290 }
02291 else
02292 nscore = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ;
02293
02294 if (false)
02295 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n",
02296 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ;
02297
02298 my_scores[i] += nscore ;
02299
02300 for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)
02301 {
02302 svm_value[s]=-CMath::INFTY;
02303 svm_value_part1[s]=-CMath::INFTY;
02304 svm_value_part2[s]=-CMath::INFTY;
02305 }
02306
02307 #ifdef DYNPROG_DEBUG
02308
02309 #endif
02310 if (is_long_transition)
02311 {
02312 #ifdef DYNPROG_DEBUG
02313 float64_t sum_score = 0.0 ;
02314
02315 for (int kk=0; kk<i; kk++)
02316 sum_score += my_scores[i] ;
02317
02318 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n",
02319 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state,
02320 nscore, sum_score,
02321 PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh],
02322 PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
02323 #endif
02324 }
02325
02326 if (is_long_transition)
02327 {
02328 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ;
02329 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
02330 }
02331 else
02332 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ;
02333
02334
02335
02336
02337
02338
02339
02340 if (is_long_transition)
02341 {
02342 for (int32_t d=1; d<=m_num_raw_data; d++)
02343 {
02344 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02345 svm_value[s]=-CMath::INFTY;
02346 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
02347 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d);
02348 for (int32_t k=0;k<num_intensities;k++)
02349 {
02350 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02351 svm_value[j]=intensities[k];
02352
02353 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02354
02355 }
02356 num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d);
02357 for (int32_t k=0;k<num_intensities;k++)
02358 {
02359 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02360 svm_value[j]=intensities[k];
02361
02362 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02363
02364 }
02365 SG_FREE(intensities);
02366
02367 }
02368 }
02369 else
02370 {
02371 for (int32_t d=1; d<=m_num_raw_data; d++)
02372 {
02373 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02374 svm_value[s]=-CMath::INFTY;
02375 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
02376 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d);
02377
02378 for (int32_t k=0;k<num_intensities;k++)
02379 {
02380 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02381 svm_value[j]=intensities[k];
02382
02383 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ;
02384
02385 }
02386 SG_FREE(intensities);
02387 }
02388 }
02389
02390 }
02391 #ifdef DYNPROG_DEBUG
02392 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02393 #endif
02394
02395
02396 for (int32_t k=0; k<max_num_signals; k++)
02397 {
02398 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
02399 {
02400 #ifdef DYNPROG_DEBUG
02401 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
02402 #endif
02403 my_scores[i] += seq_input.element(to_state, to_pos, k) ;
02404
02405
02406 break ;
02407 }
02408 if (PEN_state_signals.element(to_state, k)!=NULL)
02409 {
02410 float64_t nscore = ((CPlifBase*) PEN_state_signals.element(to_state,k))->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ;
02411 my_scores[i] += nscore ;
02412 #ifdef DYNPROG_DEBUG
02413 if (false)
02414 {
02415 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n",
02416 is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ;
02417 for (int x=0; x<23; x++)
02418 {
02419 for (int i=-10; i<10; i++)
02420 SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k));
02421 SG_PRINT("\n");
02422 }
02423
02424 }
02425 #endif
02426
02427
02428
02429
02430
02431
02432
02433
02434 #ifdef DYNPROG_DEBUG
02435 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
02436 #endif
02437 ((CPlifBase*) PEN_state_signals.element(to_state,k))->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ;
02438 } else
02439 break ;
02440 }
02441
02442
02443
02444
02445 total_score += my_scores[i] ;
02446 total_loss += my_losses[i] ;
02447
02448 }
02449
02450
02451
02452
02453 SG_FREE(svm_value);
02454 SG_FREE(svm_value_part1);
02455 SG_FREE(svm_value_part2);
02456 }
02457
02458 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type)
02459 {
02460 ASSERT(from_pos<to_pos);
02461 int32_t num_intensities = 0;
02462 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[type-1]];
02463 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]];
02464 int32_t last_pos;
02465 int32_t num = m_num_probes_cum[type-1];
02466 while (*p_tiling_pos<to_pos)
02467 {
02468 if (*p_tiling_pos>=from_pos)
02469 {
02470 intensities[num_intensities] = *p_tiling_data;
02471 num_intensities++;
02472 }
02473 num++;
02474 if (num>=m_num_probes_cum[type])
02475 break;
02476 last_pos = *p_tiling_pos;
02477 p_tiling_pos++;
02478 p_tiling_data++;
02479 ASSERT(last_pos<*p_tiling_pos);
02480 }
02481 return num_intensities;
02482 }
02483
02484 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame)
02485 {
02486 #ifdef DYNPROG_TIMING_DETAIL
02487 MyTime.start() ;
02488 #endif
02489
02490
02491
02492 for (int32_t i=0;i<m_num_svms;i++)
02493 {
02494 float64_t to_val = m_lin_feat.get_element(i, to_state);
02495 float64_t from_val = m_lin_feat.get_element(i, from_state);
02496 svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
02497 }
02498 for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++)
02499 {
02500 float64_t to_val = m_lin_feat.get_element(i, to_state);
02501 float64_t from_val = m_lin_feat.get_element(i, from_state);
02502 svm_values[i] = to_val-from_val ;
02503 }
02504 if (m_intron_list)
02505 {
02506 int32_t* support = SG_MALLOC(int32_t, m_num_intron_plifs);
02507 m_intron_list->get_intron_support(support, from_state, to_state);
02508 int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data];
02509 int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;
02510 int32_t cnt = 0;
02511 for (int32_t i=intron_list_start; i<intron_list_end;i++)
02512 {
02513 svm_values[i] = (float64_t) (support[cnt]);
02514 cnt++;
02515 }
02516
02517
02518 SG_FREE(support);
02519 }
02520
02521 if (frame!=-1)
02522 {
02523 svm_values[frame_plifs[0]] = 1e10;
02524 svm_values[frame_plifs[1]] = 1e10;
02525 svm_values[frame_plifs[2]] = 1e10;
02526 int32_t global_frame = from_pos%3;
02527 int32_t row = ((global_frame+frame)%3)+4;
02528 float64_t to_val = m_lin_feat.get_element(row, to_state);
02529 float64_t from_val = m_lin_feat.get_element(row, from_state);
02530 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
02531 }
02532 #ifdef DYNPROG_TIMING_DETAIL
02533 MyTime.stop() ;
02534 content_svm_values_time += MyTime.time_diff_sec() ;
02535 #endif
02536 }
02537 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs)
02538 {
02539 m_intron_list = intron_list;
02540 m_num_intron_plifs = num_plifs;
02541 }
02542