00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <shogun/structure/DynProg.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/lib/config.h>
00017 #include <shogun/features/StringFeatures.h>
00018 #include <shogun/features/Alphabet.h>
00019 #include <shogun/structure/Plif.h>
00020 #include <shogun/structure/IntronList.h>
00021 #include <shogun/lib/Array.h>
00022 #include <shogun/lib/Array2.h>
00023 #include <shogun/lib/Array3.h>
00024
00025 #include <stdlib.h>
00026 #include <stdio.h>
00027 #include <time.h>
00028 #include <ctype.h>
00029
00030 using namespace shogun;
00031
00032
00033
00034
00035 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ;
00036 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ;
00037 int32_t CDynProg::frame_plifs[3]={4,5,6};
00038 int32_t CDynProg::num_words_default[4]= {64,256,1024,4096} ;
00039 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1,
00040 1,1,1,1,1,1,1,1,
00041 0,0,0,0,0,0,0,0,
00042 0,0,0,0,0,0,0,0} ;
00043 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true,
00044 false,false,false,false,false,false,false,false} ;
00045 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0,
00046 1,1,1,1,1,1,1,1} ;
00047
00048 CDynProg::CDynProg(int32_t num_svms )
00049 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
00050 m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
00051 m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
00052 m_end_state_distribution_q_deriv(1),
00053
00054
00055 m_num_degrees(4),
00056 m_num_svms(num_svms),
00057 m_word_degree(word_degree_default, m_num_degrees, true, true),
00058 m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
00059 m_cum_num_words_array(m_cum_num_words.get_array()),
00060 m_num_words(num_words_default, m_num_degrees, true, true),
00061 m_num_words_array(m_num_words.get_array()),
00062 m_mod_words(mod_words_default, m_num_svms, 2, true, true),
00063 m_mod_words_array(m_mod_words.get_array()),
00064 m_sign_words(sign_words_default, m_num_svms, true, true),
00065 m_sign_words_array(m_sign_words.get_array()),
00066 m_string_words(string_words_default, m_num_svms, true, true),
00067 m_string_words_array(m_string_words.get_array()),
00068
00069 m_num_unique_words(m_num_degrees),
00070 m_svm_arrays_clean(true),
00071
00072 m_max_a_id(0), m_observation_matrix(1,1,1),
00073 m_pos(1),
00074 m_seq_len(0),
00075 m_orf_info(1,2),
00076 m_plif_list(1),
00077 m_PEN(1,1),
00078 m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2),
00079 m_segment_ids(1),
00080 m_segment_mask(1),
00081 m_my_state_seq(1),
00082 m_my_pos_seq(1),
00083 m_my_scores(1),
00084 m_my_losses(1),
00085 m_scores(1),
00086 m_states(1,1),
00087 m_positions(1,1),
00088
00089 m_seq_sparse1(NULL),
00090 m_seq_sparse2(NULL),
00091 m_plif_matrices(NULL),
00092
00093 m_genestr_stop(1),
00094 m_intron_list(NULL),
00095 m_num_intron_plifs(0),
00096 m_lin_feat(1,1),
00097 m_raw_intensities(NULL),
00098 m_probe_pos(NULL),
00099 m_num_probes_cum(NULL),
00100 m_num_lin_feat_plifs_cum(NULL),
00101 m_num_raw_data(0),
00102
00103 m_long_transitions(true),
00104 m_long_transition_threshold(1000)
00105 {
00106 trans_list_forward = NULL ;
00107 trans_list_forward_cnt = NULL ;
00108 trans_list_forward_val = NULL ;
00109 trans_list_forward_id = NULL ;
00110 trans_list_len = 0 ;
00111
00112 mem_initialized = true ;
00113
00114 m_N=1;
00115
00116 m_raw_intensities = NULL;
00117 m_probe_pos = NULL;
00118 m_num_probes_cum = SG_MALLOC(int32_t, 100);
00119 m_num_probes_cum[0] = 0;
00120
00121 m_num_lin_feat_plifs_cum = SG_MALLOC(int32_t, 100);
00122 m_num_lin_feat_plifs_cum[0] = m_num_svms;
00123 m_num_raw_data = 0;
00124 #ifdef ARRAY_STATISTICS
00125 m_word_degree.set_array_name("word_degree");
00126 #endif
00127
00128 m_transition_matrix_a_id.set_array_name("transition_matrix_a_id");
00129 m_transition_matrix_a.set_array_name("transition_matrix_a");
00130 m_transition_matrix_a_deriv.set_array_name("transition_matrix_a_deriv");
00131 m_mod_words.set_array_name("mod_words");
00132 m_orf_info.set_array_name("orf_info");
00133 m_segment_sum_weights.set_array_name("segment_sum_weights");
00134 m_PEN.set_array_name("PEN");
00135 m_PEN_state_signals.set_array_name("PEN_state_signals");
00136 m_dict_weights.set_array_name("dict_weights");
00137 m_states.set_array_name("states");
00138 m_positions.set_array_name("positions");
00139 m_lin_feat.set_array_name("lin_feat");
00140
00141
00142 m_observation_matrix.set_array_name("m_observation_matrix");
00143 m_segment_loss.set_array_name("m_segment_loss");
00144 m_seg_loss_obj = new CSegmentLoss();
00145 }
00146
00147 CDynProg::~CDynProg()
00148 {
00149 if (trans_list_forward_cnt)
00150 SG_FREE(trans_list_forward_cnt);
00151 if (trans_list_forward)
00152 {
00153 for (int32_t i=0; i<trans_list_len; i++)
00154 {
00155 if (trans_list_forward[i])
00156 SG_FREE(trans_list_forward[i]);
00157 }
00158 SG_FREE(trans_list_forward);
00159 }
00160 if (trans_list_forward_val)
00161 {
00162 for (int32_t i=0; i<trans_list_len; i++)
00163 {
00164 if (trans_list_forward_val[i])
00165 SG_FREE(trans_list_forward_val[i]);
00166 }
00167 SG_FREE(trans_list_forward_val);
00168 }
00169 if (trans_list_forward_id)
00170 {
00171 for (int32_t i=0; i<trans_list_len; i++)
00172 {
00173 if (trans_list_forward_id[i])
00174 SG_FREE(trans_list_forward_id[i]);
00175 }
00176 SG_FREE(trans_list_forward_id);
00177 }
00178 if (m_raw_intensities)
00179 SG_FREE(m_raw_intensities);
00180 if (m_probe_pos)
00181 SG_FREE(m_probe_pos);
00182 if (m_num_probes_cum)
00183 SG_FREE(m_num_probes_cum);
00184 if (m_num_lin_feat_plifs_cum)
00185 SG_FREE(m_num_lin_feat_plifs_cum);
00186
00187 delete m_intron_list;
00188
00189 SG_UNREF(m_seq_sparse1);
00190 SG_UNREF(m_seq_sparse2);
00191 SG_UNREF(m_plif_matrices);
00192 }
00193
00195 int32_t CDynProg::get_num_svms()
00196 {
00197 return m_num_svms;
00198 }
00199
00200 void CDynProg::precompute_stop_codons()
00201 {
00202 int32_t length=m_genestr.get_dim1();
00203
00204 m_genestr_stop.resize_array(length) ;
00205 m_genestr_stop.zero() ;
00206 m_genestr_stop.set_array_name("genestr_stop") ;
00207 {
00208 for (int32_t i=0; i<length-2; i++)
00209 if ((m_genestr[i]=='t' || m_genestr[i]=='T') &&
00210 (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') &&
00211 (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) ||
00212 ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') )))
00213 {
00214 m_genestr_stop.element(i)=true ;
00215 }
00216 else
00217 m_genestr_stop.element(i)=false ;
00218 m_genestr_stop.element(length-2)=false ;
00219 m_genestr_stop.element(length-1)=false ;
00220 }
00221 }
00222
00223 void CDynProg::set_num_states(int32_t p_N)
00224 {
00225 m_N=p_N ;
00226
00227 m_transition_matrix_a_id.resize_array(m_N,m_N) ;
00228 m_transition_matrix_a.resize_array(m_N,m_N) ;
00229 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
00230 m_initial_state_distribution_p.resize_array(m_N) ;
00231 m_initial_state_distribution_p_deriv.resize_array(m_N) ;
00232 m_end_state_distribution_q.resize_array(m_N);
00233 m_end_state_distribution_q_deriv.resize_array(m_N) ;
00234
00235 m_orf_info.resize_array(m_N,2) ;
00236 m_PEN.resize_array(m_N,m_N) ;
00237 }
00238
00239 int32_t CDynProg::get_num_states()
00240 {
00241 return m_N;
00242 }
00243
00244 void CDynProg::init_tiling_data(
00245 int32_t* probe_pos, float64_t* intensities, const int32_t num_probes)
00246 {
00247 m_num_raw_data++;
00248 m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes;
00249
00250 int32_t* tmp_probe_pos = SG_MALLOC(int32_t, m_num_probes_cum[m_num_raw_data]);
00251 float64_t* tmp_raw_intensities = SG_MALLOC(float64_t, m_num_probes_cum[m_num_raw_data]);
00252
00253
00254 if (m_num_raw_data==1){
00255 memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00256 memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
00257
00258 }else{
00259 memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t));
00260 memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));
00261 memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));
00262 memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));
00263 }
00264 SG_FREE(m_probe_pos);
00265 SG_FREE(m_raw_intensities);
00266 m_probe_pos = tmp_probe_pos;
00267 m_raw_intensities = tmp_raw_intensities;
00268
00269
00270
00271
00272 }
00273
00274 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms)
00275 {
00276 m_lin_feat.resize_array(p_num_svms, m_seq_len);
00277
00278
00279 for (int s=0; s<p_num_svms; s++)
00280 for (int p=0; p<m_seq_len; p++)
00281 m_lin_feat.set_element(0.0, s, p) ;
00282 }
00283
00284 void CDynProg::resize_lin_feat(const int32_t num_new_feat)
00285 {
00286 int32_t dim1, dim2;
00287 m_lin_feat.get_array_size(dim1, dim2);
00288 ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]);
00289 ASSERT(dim2==m_seq_len);
00290
00291
00292
00293 float64_t* arr = m_lin_feat.get_array();
00294 float64_t* tmp = SG_MALLOC(float64_t, (dim1+num_new_feat)*dim2);
00295 memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
00296 for(int32_t j=0;j<m_seq_len;j++)
00297 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
00298 tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
00299
00300 m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);
00301 SG_FREE(tmp);
00302
00303
00304
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315 }
00316
00317 void CDynProg::precompute_tiling_plifs(
00318 CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs)
00319 {
00320 m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs;
00321 float64_t* tiling_plif = SG_MALLOC(float64_t, num_tiling_plifs);
00322 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
00323 for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++)
00324 svm_value[i]=0.0;
00325 int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs);
00326 for (int32_t i=0; i<num_tiling_plifs; i++)
00327 {
00328 tiling_plif[i]=0.0;
00329 CPlif * plif = PEN[tiling_plif_ids[i]];
00330 tiling_rows[i] = plif->get_use_svm();
00331
00332 ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
00333 }
00334 resize_lin_feat(num_tiling_plifs);
00335
00336
00337 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
00338 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
00339 int32_t num=m_num_probes_cum[m_num_raw_data-1];
00340
00341 for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++)
00342 {
00343 while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx])
00344 {
00345 for (int32_t i=0; i<num_tiling_plifs; i++)
00346 {
00347 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
00348 CPlif * plif = PEN[tiling_plif_ids[i]];
00349 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1);
00350 plif->set_do_calc(true);
00351 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00352 plif->set_do_calc(false);
00353 }
00354 p_tiling_data++;
00355 p_tiling_pos++;
00356 num++;
00357 }
00358 for (int32_t i=0; i<num_tiling_plifs; i++)
00359 m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
00360 }
00361 SG_FREE(svm_value);
00362 SG_FREE(tiling_plif);
00363 SG_FREE(tiling_rows);
00364 }
00365
00366 void CDynProg::create_word_string()
00367 {
00368 SG_FREE(m_wordstr);
00369 m_wordstr=SG_MALLOC(uint16_t**, 5440);
00370 int32_t k=0;
00371 int32_t genestr_len=m_genestr.get_dim1();
00372
00373 m_wordstr[k]=SG_MALLOC(uint16_t*, m_num_degrees);
00374 for (int32_t j=0; j<m_num_degrees; j++)
00375 {
00376 m_wordstr[k][j]=NULL ;
00377 {
00378 m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len);
00379 for (int32_t i=0; i<genestr_len; i++)
00380 switch (m_genestr[i])
00381 {
00382 case 'A':
00383 case 'a': m_wordstr[k][j][i]=0 ; break ;
00384 case 'C':
00385 case 'c': m_wordstr[k][j][i]=1 ; break ;
00386 case 'G':
00387 case 'g': m_wordstr[k][j][i]=2 ; break ;
00388 case 'T':
00389 case 't': m_wordstr[k][j][i]=3 ; break ;
00390 default: ASSERT(0) ;
00391 }
00392 CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ;
00393 }
00394 }
00395 }
00396
00397 void CDynProg::precompute_content_values()
00398 {
00399 for (int32_t s=0; s<m_num_svms; s++)
00400 m_lin_feat.set_element(0.0, s, 0);
00401
00402 for (int32_t p=0 ; p<m_seq_len-1 ; p++)
00403 {
00404 int32_t from_pos = m_pos[p];
00405 int32_t to_pos = m_pos[p+1];
00406 float64_t* my_svm_values_unnormalized = SG_MALLOC(float64_t, m_num_svms);
00407
00408
00409 ASSERT(from_pos<=m_genestr.get_dim1());
00410 ASSERT(to_pos<=m_genestr.get_dim1());
00411
00412 for (int32_t s=0; s<m_num_svms; s++)
00413 my_svm_values_unnormalized[s]=0.0;
00414
00415 for (int32_t i=from_pos; i<to_pos; i++)
00416 {
00417 for (int32_t j=0; j<m_num_degrees; j++)
00418 {
00419 uint16_t word = m_wordstr[0][j][i] ;
00420 for (int32_t s=0; s<m_num_svms; s++)
00421 {
00422
00423 if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1))
00424 continue;
00425 my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ;
00426 }
00427 }
00428 }
00429 for (int32_t s=0; s<m_num_svms; s++)
00430 {
00431 float64_t prev = m_lin_feat.get_element(s, p);
00432
00433 if (prev<-1e20 || prev>1e20)
00434 {
00435 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) ;
00436 prev=0 ;
00437 }
00438 m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1);
00439 }
00440 SG_FREE(my_svm_values_unnormalized);
00441 }
00442
00443
00444
00445 }
00446
00447 void CDynProg::set_p_vector(SGVector<float64_t> p)
00448 {
00449 if (!(p.vlen==m_N))
00450 SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.vlen, m_N);
00451
00452 m_initial_state_distribution_p.set_array(p.vector, p.vlen, true, true);
00453 }
00454
00455 void CDynProg::set_q_vector(SGVector<float64_t> q)
00456 {
00457 if (!(q.vlen==m_N))
00458 SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.vlen, m_N);
00459 m_end_state_distribution_q.set_array(q.vector, q.vlen, true, true);
00460 }
00461
00462 void CDynProg::set_a(SGMatrix<float64_t> a)
00463 {
00464 ASSERT(a.num_cols==m_N);
00465 ASSERT(a.num_rows==m_N);
00466 m_transition_matrix_a.set_array(a.matrix, m_N, m_N, true, true);
00467 m_transition_matrix_a_deriv.resize_array(m_N, m_N);
00468 }
00469
00470 void CDynProg::set_a_id(SGMatrix<int32_t> a)
00471 {
00472 ASSERT(a.num_cols==m_N);
00473 ASSERT(a.num_rows==m_N);
00474 m_transition_matrix_a_id.set_array(a.matrix, m_N, m_N, true, true);
00475 m_max_a_id = 0;
00476 for (int32_t i=0; i<m_N; i++)
00477 {
00478 for (int32_t j=0; j<m_N; j++)
00479 m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j));
00480 }
00481 }
00482
00483 void CDynProg::set_a_trans_matrix(SGMatrix<float64_t> a_trans)
00484 {
00485 int32_t num_trans=a_trans.num_rows;
00486 int32_t num_cols=a_trans.num_cols;
00487
00488
00489
00490 if (!((num_cols==3) || (num_cols==4)))
00491 SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols);
00492
00493 SG_FREE(trans_list_forward);
00494 SG_FREE(trans_list_forward_cnt);
00495 SG_FREE(trans_list_forward_val);
00496 SG_FREE(trans_list_forward_id);
00497
00498 trans_list_forward = NULL ;
00499 trans_list_forward_cnt = NULL ;
00500 trans_list_forward_val = NULL ;
00501 trans_list_len = 0 ;
00502
00503 m_transition_matrix_a.zero() ;
00504 m_transition_matrix_a_id.zero() ;
00505
00506 mem_initialized = true ;
00507
00508 trans_list_forward_cnt=NULL ;
00509 trans_list_len = m_N ;
00510 trans_list_forward = SG_MALLOC(T_STATES*, m_N);
00511 trans_list_forward_cnt = SG_MALLOC(T_STATES, m_N);
00512 trans_list_forward_val = SG_MALLOC(float64_t*, m_N);
00513 trans_list_forward_id = SG_MALLOC(int32_t*, m_N);
00514
00515 int32_t start_idx=0;
00516 for (int32_t j=0; j<m_N; j++)
00517 {
00518 int32_t old_start_idx=start_idx;
00519
00520 while (start_idx<num_trans && a_trans.matrix[start_idx+num_trans]==j)
00521 {
00522 start_idx++;
00523
00524 if (start_idx>1 && start_idx<num_trans)
00525 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]);
00526 }
00527
00528 if (start_idx>1 && start_idx<num_trans)
00529 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]);
00530
00531 int32_t len=start_idx-old_start_idx;
00532 ASSERT(len>=0);
00533
00534 trans_list_forward_cnt[j] = 0 ;
00535
00536 if (len>0)
00537 {
00538 trans_list_forward[j] = SG_MALLOC(T_STATES, len);
00539 trans_list_forward_val[j] = SG_MALLOC(float64_t, len);
00540 trans_list_forward_id[j] = SG_MALLOC(int32_t, len);
00541 }
00542 else
00543 {
00544 trans_list_forward[j] = NULL;
00545 trans_list_forward_val[j] = NULL;
00546 trans_list_forward_id[j] = NULL;
00547 }
00548 }
00549
00550 for (int32_t i=0; i<num_trans; i++)
00551 {
00552 int32_t from_state = (int32_t)a_trans.matrix[i] ;
00553 int32_t to_state = (int32_t)a_trans.matrix[i+num_trans] ;
00554 float64_t val = a_trans.matrix[i+num_trans*2] ;
00555 int32_t id = 0 ;
00556 if (num_cols==4)
00557 id = (int32_t)a_trans.matrix[i+num_trans*3] ;
00558
00559
00560 ASSERT(to_state>=0 && to_state<m_N);
00561 ASSERT(from_state>=0 && from_state<m_N);
00562
00563 trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00564 trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00565 trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00566 trans_list_forward_cnt[to_state]++ ;
00567 m_transition_matrix_a.element(from_state, to_state) = val ;
00568 m_transition_matrix_a_id.element(from_state, to_state) = id ;
00569
00570 } ;
00571
00572 m_max_a_id = 0 ;
00573 for (int32_t i=0; i<m_N; i++)
00574 for (int32_t j=0; j<m_N; j++)
00575 {
00576
00577
00578 m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ;
00579 }
00580
00581 }
00582
00583
00584 void CDynProg::init_mod_words_array(SGMatrix<int32_t> mod_words_input)
00585 {
00586
00587
00588
00589
00590
00591
00592 m_svm_arrays_clean=false ;
00593
00594 ASSERT(m_num_svms==mod_words_input.num_rows);
00595 ASSERT(mod_words_input.num_cols==2);
00596
00597 m_mod_words.set_array(mod_words_input.matrix, mod_words_input.num_rows, 2, true, true) ;
00598 m_mod_words_array = m_mod_words.get_array() ;
00599
00600
00601
00602
00603
00604 }
00605
00606 bool CDynProg::check_svm_arrays()
00607 {
00608
00609 if ((m_word_degree.get_dim1()==m_num_degrees) &&
00610 (m_cum_num_words.get_dim1()==m_num_degrees+1) &&
00611 (m_num_words.get_dim1()==m_num_degrees) &&
00612
00613
00614
00615
00616
00617
00618 (m_num_unique_words.get_dim1()==m_num_degrees) &&
00619 (m_mod_words.get_dim1()==m_num_svms) &&
00620 (m_mod_words.get_dim2()==2) &&
00621 (m_sign_words.get_dim1()==m_num_svms) &&
00622 (m_string_words.get_dim1()==m_num_svms))
00623 {
00624 m_svm_arrays_clean=true ;
00625 return true ;
00626 }
00627 else
00628 {
00629 if ((m_num_unique_words.get_dim1()==m_num_degrees) &&
00630 (m_mod_words.get_dim1()==m_num_svms) &&
00631 (m_mod_words.get_dim2()==2) &&
00632 (m_sign_words.get_dim1()==m_num_svms) &&
00633 (m_string_words.get_dim1()==m_num_svms))
00634 SG_PRINT("OK\n") ;
00635 else
00636 SG_PRINT("not OK\n") ;
00637
00638 if (!(m_word_degree.get_dim1()==m_num_degrees))
00639 SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") ;
00640 if (!(m_cum_num_words.get_dim1()==m_num_degrees+1))
00641 SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") ;
00642 if (!(m_num_words.get_dim1()==m_num_degrees))
00643 SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") ;
00644
00645
00646 if (!(m_num_unique_words.get_dim1()==m_num_degrees))
00647 SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") ;
00648 if (!(m_mod_words.get_dim1()==m_num_svms))
00649 SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") ;
00650 if (!(m_mod_words.get_dim2()==2))
00651 SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") ;
00652 if (!(m_sign_words.get_dim1()==m_num_svms))
00653 SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") ;
00654 if (!(m_string_words.get_dim1()==m_num_svms))
00655 SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") ;
00656
00657 m_svm_arrays_clean=false ;
00658 return false ;
00659 }
00660 }
00661
00662 void CDynProg::set_observation_matrix(SGNDArray<float64_t> seq)
00663 {
00664 if (seq.num_dims!=3)
00665 SG_ERROR("Expected 3-dimensional Matrix\n");
00666
00667 int32_t N=seq.dims[0];
00668 int32_t cand_pos=seq.dims[1];
00669 int32_t max_num_features=seq.dims[2];
00670
00671 if (!m_svm_arrays_clean)
00672 {
00673 SG_ERROR( "SVM arrays not clean") ;
00674 return ;
00675 } ;
00676
00677 ASSERT(N==m_N);
00678 ASSERT(cand_pos==m_seq_len);
00679 ASSERT(m_initial_state_distribution_p.get_dim1()==N);
00680 ASSERT(m_end_state_distribution_q.get_dim1()==N);
00681
00682 m_observation_matrix.set_array(seq.array, N, m_seq_len, max_num_features, true, true) ;
00683 }
00684 int32_t CDynProg::get_num_positions()
00685 {
00686 return m_seq_len;
00687 }
00688
00689 void CDynProg::set_content_type_array(SGMatrix<float64_t> seg_path)
00690 {
00691 ASSERT(seg_path.num_rows==2);
00692 ASSERT(seg_path.num_cols==m_seq_len);
00693
00694 if (seg_path.matrix!=NULL)
00695 {
00696 int32_t *segment_ids = SG_MALLOC(int32_t, m_seq_len);
00697 float64_t *segment_mask = SG_MALLOC(float64_t, m_seq_len);
00698 for (int32_t i=0; i<m_seq_len; i++)
00699 {
00700 segment_ids[i] = (int32_t)seg_path.matrix[2*i] ;
00701 segment_mask[i] = seg_path.matrix[2*i+1] ;
00702 }
00703 best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ;
00704 SG_FREE(segment_ids);
00705 SG_FREE(segment_mask);
00706 }
00707 else
00708 {
00709 int32_t *izeros = SG_MALLOC(int32_t, m_seq_len);
00710 float64_t *dzeros = SG_MALLOC(float64_t, m_seq_len);
00711 for (int32_t i=0; i<m_seq_len; i++)
00712 {
00713 izeros[i]=0 ;
00714 dzeros[i]=0.0 ;
00715 }
00716 best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ;
00717 SG_FREE(izeros);
00718 SG_FREE(dzeros);
00719 }
00720 }
00721
00722 void CDynProg::set_pos(SGVector<int32_t> pos)
00723 {
00724 m_pos.set_array(pos.vector, pos.vlen, true, true) ;
00725 m_seq_len = pos.vlen;
00726 }
00727
00728 void CDynProg::set_orf_info(SGMatrix<int32_t> orf_info)
00729 {
00730 if (orf_info.num_cols!=2)
00731 SG_ERROR( "orf_info size incorrect %i!=2\n", orf_info.num_cols) ;
00732
00733 m_orf_info.set_array(orf_info.matrix, orf_info.num_rows, orf_info.num_cols, true, true) ;
00734 m_orf_info.set_array_name("orf_info") ;
00735 }
00736
00737 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2)
00738 {
00739 if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
00740 SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n");
00741
00742 SG_UNREF(m_seq_sparse1);
00743 SG_UNREF(m_seq_sparse2);
00744
00745 m_seq_sparse1=seq_sparse1;
00746 m_seq_sparse2=seq_sparse2;
00747 SG_REF(m_seq_sparse1);
00748 SG_REF(m_seq_sparse2);
00749 }
00750
00751 void CDynProg::set_plif_matrices(CPlifMatrix* pm)
00752 {
00753 SG_UNREF(m_plif_matrices);
00754
00755 m_plif_matrices=pm;
00756
00757 SG_REF(m_plif_matrices);
00758 }
00759
00760 void CDynProg::set_gene_string(SGVector<char> genestr)
00761 {
00762 ASSERT(genestr.vector);
00763 ASSERT(genestr.vlen>0);
00764
00765 m_genestr.set_array(genestr.vector, genestr.vlen, true, true) ;
00766 }
00767
00768 void CDynProg::set_my_state_seq(int32_t* my_state_seq)
00769 {
00770 ASSERT(my_state_seq && m_seq_len>0);
00771 m_my_state_seq.resize_array(m_seq_len);
00772 for (int32_t i=0; i<m_seq_len; i++)
00773 m_my_state_seq[i]=my_state_seq[i];
00774 }
00775
00776 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq)
00777 {
00778 ASSERT(my_pos_seq && m_seq_len>0);
00779 m_my_pos_seq.resize_array(m_seq_len);
00780 for (int32_t i=0; i<m_seq_len; i++)
00781 m_my_pos_seq[i]=my_pos_seq[i];
00782 }
00783
00784 void CDynProg::set_dict_weights(SGMatrix<float64_t> dictionary_weights)
00785 {
00786 if (m_num_svms!=dictionary_weights.num_cols)
00787 {
00788 SG_ERROR( "m_dict_weights array does not match num_svms=%i!=%i\n",
00789 m_num_svms, dictionary_weights.num_cols) ;
00790 }
00791
00792 m_dict_weights.set_array(dictionary_weights.matrix, dictionary_weights.num_rows, m_num_svms, true, true) ;
00793
00794
00795 m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ;
00796 m_segment_loss.zero() ;
00797 m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ;
00798 m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ;
00799 m_segment_ids.zero() ;
00800 m_segment_mask.zero() ;
00801 }
00802
00803 void CDynProg::best_path_set_segment_loss(SGMatrix<float64_t> segment_loss)
00804 {
00805 int32_t m=segment_loss.num_rows;
00806 int32_t n=segment_loss.num_cols;
00807
00808 if (2*m!=n)
00809 SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00810
00811 if (m!=m_max_a_id+1)
00812 SG_ERROR( "segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) ;
00813
00814 m_segment_loss.set_array(segment_loss.matrix, m, n/2, 2, true, true) ;
00815
00816
00817
00818 }
00819
00820 void CDynProg::best_path_set_segment_ids_mask(
00821 int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00822 {
00823
00824 if (m!=m_observation_matrix.get_dim2())
00825 SG_ERROR("size of segment_ids or segment_mask (%i) does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2());
00826 int32_t max_id = 0;
00827 for (int32_t i=1;i<m;i++)
00828 max_id = CMath::max(max_id,segment_ids[i]);
00829
00830 m_segment_ids.set_array(segment_ids, m, true, true) ;
00831 m_segment_ids.set_array_name("m_segment_ids");
00832 m_segment_mask.set_array(segment_mask, m, true, true) ;
00833 m_segment_mask.set_array_name("m_segment_mask");
00834
00835 m_seg_loss_obj->set_segment_mask(&m_segment_mask);
00836 m_seg_loss_obj->set_segment_ids(&m_segment_ids);
00837 m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len);
00838 }
00839
00840 SGVector<float64_t> CDynProg::get_scores()
00841 {
00842 SGVector<float64_t> scores(m_scores.get_dim1());
00843 memcpy(scores.vector,m_scores.get_array(), sizeof(float64_t)*(m_scores.get_dim1()));
00844
00845 return scores;
00846 }
00847
00848 SGMatrix<int32_t> CDynProg::get_states()
00849 {
00850 SGMatrix<int32_t> states(m_states.get_dim1(), m_states.get_dim2());
00851
00852 int32_t sz = sizeof(int32_t)*( m_states.get_dim1() * m_states.get_dim2() );
00853 memcpy(states.matrix ,m_states.get_array(),sz);
00854
00855 return states;
00856 }
00857
00858 SGMatrix<int32_t> CDynProg::get_positions()
00859 {
00860 SGMatrix<int32_t> positions(m_positions.get_dim1(), m_positions.get_dim2());
00861
00862 int32_t sz = sizeof(int32_t)*(m_positions.get_dim1()*m_positions.get_dim2());
00863 memcpy(positions.matrix, m_positions.get_array(),sz);
00864
00865 return positions;
00866 }
00867
00868 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len)
00869 {
00870 ASSERT(scores && seq_len);
00871
00872 *seq_len=m_my_scores.get_dim1();
00873
00874 int32_t sz = sizeof(float64_t)*(*seq_len);
00875
00876 *scores = SG_MALLOC(float64_t, *seq_len);
00877 ASSERT(*scores);
00878
00879 memcpy(*scores,m_my_scores.get_array(),sz);
00880 }
00881
00882 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len)
00883 {
00884 ASSERT(losses && seq_len);
00885
00886 *seq_len=m_my_losses.get_dim1();
00887
00888 int32_t sz = sizeof(float64_t)*(*seq_len);
00889
00890 *losses = SG_MALLOC(float64_t, *seq_len);
00891 ASSERT(*losses);
00892
00893 memcpy(*losses,m_my_losses.get_array(),sz);
00894 }
00895
00897
00898 bool CDynProg::extend_orf(
00899 int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
00900 int32_t to)
00901 {
00902 #ifdef DYNPROG_TIMING_DETAIL
00903 MyTime.start() ;
00904 #endif
00905
00906 if (start<0)
00907 start=0 ;
00908 if (to<0)
00909 to=0 ;
00910
00911 int32_t orf_target = orf_to-orf_from ;
00912 if (orf_target<0) orf_target+=3 ;
00913
00914 int32_t pos ;
00915 if (last_pos==to)
00916 pos = to-orf_to-3 ;
00917 else
00918 pos=last_pos ;
00919
00920 if (pos<0)
00921 {
00922 #ifdef DYNPROG_TIMING_DETAIL
00923 MyTime.stop() ;
00924 orf_time += MyTime.time_diff_sec() ;
00925 #endif
00926 return true ;
00927 }
00928
00929 for (; pos>=start; pos-=3)
00930 if (m_genestr_stop[pos])
00931 {
00932 #ifdef DYNPROG_TIMING_DETAIL
00933 MyTime.stop() ;
00934 orf_time += MyTime.time_diff_sec() ;
00935 #endif
00936 return false ;
00937 }
00938
00939
00940 last_pos = CMath::min(pos+3,to-orf_to-3) ;
00941
00942 #ifdef DYNPROG_TIMING_DETAIL
00943 MyTime.stop() ;
00944 orf_time += MyTime.time_diff_sec() ;
00945 #endif
00946 return true ;
00947 }
00948
00949 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf,
00950 int16_t nbest, bool with_loss, bool with_multiple_sequences)
00951 {
00952
00953
00954
00955
00956
00957
00958
00959
00960 const float64_t* seq_array = m_observation_matrix.get_array();
00961 m_scores.resize_array(nbest) ;
00962 m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00963 m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00964
00965 for (int32_t i=0; i<nbest; i++)
00966 {
00967 m_scores[i]=-1;
00968 for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++)
00969 {
00970 m_states.element(i,j)=-1;
00971 m_positions.element(i,j)=-1;
00972 }
00973 }
00974 float64_t* prob_nbest=m_scores.get_array();
00975 int32_t* my_state_seq=m_states.get_array();
00976 int32_t* my_pos_seq=m_positions.get_array();
00977
00978 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
00979 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
00980
00981
00982
00983 #ifdef DYNPROG_TIMING
00984 segment_init_time = 0.0 ;
00985 segment_pos_time = 0.0 ;
00986 segment_extend_time = 0.0 ;
00987 segment_clean_time = 0.0 ;
00988 orf_time = 0.0 ;
00989 svm_init_time = 0.0 ;
00990 svm_pos_time = 0.0 ;
00991 svm_clean_time = 0.0 ;
00992 inner_loop_time = 0.0 ;
00993 content_svm_values_time = 0.0 ;
00994 content_plifs_time = 0.0 ;
00995 inner_loop_max_time = 0.0 ;
00996 long_transition_time = 0.0 ;
00997
00998 MyTime2.start() ;
00999 #endif
01000
01001 if (!m_svm_arrays_clean)
01002 {
01003 SG_ERROR( "SVM arrays not clean") ;
01004 return ;
01005 }
01006
01007 #ifdef DYNPROG_DEBUG
01008 m_transition_matrix_a.set_array_name("transition_matrix");
01009 m_transition_matrix_a.display_array();
01010 m_mod_words.display_array() ;
01011 m_sign_words.display_array() ;
01012 m_string_words.display_array() ;
01013
01014 #endif
01015
01016 int32_t max_look_back = 1000 ;
01017 bool use_svm = false ;
01018
01019 SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) ;
01020
01021
01022
01023
01024 CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ;
01025 PEN.set_array_name("PEN");
01026 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ;
01027 PEN_state_signals.set_array_name("state_signals");
01028
01029 CArray2<float64_t> seq(m_N, m_seq_len) ;
01030 seq.set_array_name("seq") ;
01031 seq.zero() ;
01032
01033 #ifdef DYNPROG_DEBUG
01034 SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data);
01035 SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs);
01036 SG_PRINT("m_num_svms: %i\n", m_num_svms);
01037 SG_PRINT("m_num_lin_feat_plifs_cum: ");
01038 for (int i=0; i<=m_num_raw_data; i++)
01039 SG_PRINT(" %i ",m_num_lin_feat_plifs_cum[i]);
01040 SG_PRINT("\n");
01041 #endif
01042
01043 float64_t* svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
01044 {
01045 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01046 svm_value[s]=0 ;
01047 }
01048
01049 {
01050
01051
01052
01053 CArray3<float64_t> *seq_input=NULL ;
01054 if (seq_array!=NULL)
01055 {
01056
01057
01058 seq_input=new CArray3<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ;
01059 seq_input->set_array_name("seq_input") ;
01060
01061
01062 ASSERT(m_seq_sparse1==NULL) ;
01063 ASSERT(m_seq_sparse2==NULL) ;
01064 } else
01065 {
01066 SG_PRINT("using sparse seq_array\n") ;
01067
01068 ASSERT(m_seq_sparse1!=NULL) ;
01069 ASSERT(m_seq_sparse2!=NULL) ;
01070 ASSERT(max_num_signals==2) ;
01071 }
01072
01073 for (int32_t i=0; i<m_N; i++)
01074 for (int32_t j=0; j<m_seq_len; j++)
01075 seq.element(i,j) = 0 ;
01076
01077 for (int32_t i=0; i<m_N; i++)
01078 for (int32_t j=0; j<m_seq_len; j++)
01079 for (int32_t k=0; k<max_num_signals; k++)
01080 {
01081 if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
01082 {
01083
01084 if (seq_input!=NULL)
01085 seq.element(i,j) = seq_input->element(i,j,k) ;
01086 else
01087 {
01088 if (k==0)
01089 seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ;
01090 if (k==1)
01091 seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ;
01092 }
01093 break ;
01094 }
01095 if (PEN_state_signals.element(i,k)!=NULL)
01096 {
01097 if (seq_input!=NULL)
01098 {
01099
01100 if (CMath::is_finite(seq_input->element(i,j,k)))
01101 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input->element(i,j,k), svm_value) ;
01102 else
01103
01104 seq.element(i,j) = seq_input->element(i, j, k) ;
01105 }
01106 else
01107 {
01108 if (k==0)
01109 {
01110
01111 if (CMath::is_finite(m_seq_sparse1->get_feature(i,j)))
01112 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ;
01113 else
01114
01115 seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ;
01116 }
01117 if (k==1)
01118 {
01119
01120 if (CMath::is_finite(m_seq_sparse2->get_feature(i,j)))
01121 seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ;
01122 else
01123
01124 seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ;
01125 }
01126 }
01127 }
01128 else
01129 break ;
01130 }
01131 delete seq_input;
01132 SG_FREE(svm_value);
01133 }
01134
01135
01136 bool long_transitions = m_long_transitions ;
01137 CArray2<int32_t> long_transition_content_start_position(m_N,m_N) ;
01138 long_transition_content_start_position.set_array_name("long_transition_content_start_position");
01139 #ifdef DYNPROG_DEBUG
01140 CArray2<int32_t> long_transition_content_end_position(m_N,m_N) ;
01141 long_transition_content_end_position.set_array_name("long_transition_content_end_position");
01142 #endif
01143 CArray2<int32_t> long_transition_content_start(m_N,m_N) ;
01144 long_transition_content_start.set_array_name("long_transition_content_start");
01145 CArray2<float64_t> long_transition_content_scores(m_N,m_N) ;
01146 long_transition_content_scores.set_array_name("long_transition_content_scores");
01147 #ifdef DYNPROG_DEBUG
01148 CArray2<float64_t> long_transition_content_scores_pen(m_N,m_N) ;
01149 long_transition_content_scores_pen.set_array_name("long_transition_content_scores_pen");
01150 CArray2<float64_t> long_transition_content_scores_prev(m_N,m_N) ;
01151 long_transition_content_scores_prev.set_array_name("long_transition_content_scores_prev");
01152 CArray2<float64_t> long_transition_content_scores_elem(m_N,m_N) ;
01153 long_transition_content_scores_elem.set_array_name("long_transition_content_scores_elem");
01154 #endif
01155 CArray2<float64_t> long_transition_content_scores_loss(m_N,m_N) ;
01156 long_transition_content_scores_loss.set_array_name("long_transition_content_scores_loss");
01157
01158 if (nbest!=1)
01159 {
01160 SG_ERROR("Long transitions are not supported for nbest!=1") ;
01161 long_transitions = false ;
01162 }
01163 long_transition_content_scores.set_const(-CMath::INFTY);
01164 #ifdef DYNPROG_DEBUG
01165 long_transition_content_scores_pen.set_const(0) ;
01166 long_transition_content_scores_elem.set_const(0) ;
01167 long_transition_content_scores_prev.set_const(0) ;
01168 #endif
01169 if (with_loss)
01170 long_transition_content_scores_loss.set_const(0) ;
01171 long_transition_content_start.zero() ;
01172 long_transition_content_start_position.zero() ;
01173 #ifdef DYNPROG_DEBUG
01174 long_transition_content_end_position.zero() ;
01175 #endif
01176
01177 svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
01178 {
01179 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01180 svm_value[s]=0 ;
01181 }
01182
01183 CArray2<int32_t> look_back(m_N,m_N) ;
01184 look_back.set_array_name("look_back");
01185
01186
01187
01188
01189 {
01190 for (int32_t i=0; i<m_N; i++)
01191 for (int32_t j=0; j<m_N; j++)
01192 {
01193 look_back.set_element(INT_MAX, i, j) ;
01194
01195 }
01196
01197 for (int32_t j=0; j<m_N; j++)
01198 {
01199
01200 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01201 const T_STATES *elem_list = trans_list_forward[j] ;
01202
01203 for (int32_t i=0; i<num_elem; i++)
01204 {
01205 T_STATES ii = elem_list[i] ;
01206
01207 CPlifBase *penij=PEN.element(j, ii) ;
01208 if (penij==NULL)
01209 {
01210 if (long_transitions)
01211 {
01212 look_back.set_element(m_long_transition_threshold, j, ii) ;
01213
01214 }
01215 continue ;
01216 }
01217
01218
01219 if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions))
01220 {
01221 look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01222
01223 if (CMath::ceil(penij->get_max_value()) > max_look_back)
01224 {
01225 SG_DEBUG( "%d %d -> value: %f\n", ii,j,penij->get_max_value());
01226 max_look_back = (int32_t) (CMath::ceil(penij->get_max_value()));
01227 }
01228 }
01229 else
01230 {
01231 look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ;
01232
01233 }
01234
01235 if (penij->uses_svm_values())
01236 use_svm=true ;
01237 }
01238 }
01239
01240 if (long_transitions)
01241 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ;
01242
01243
01244 max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ;
01245
01246 int32_t num_long_transitions = 0 ;
01247 for (int32_t i=0; i<m_N; i++)
01248 for (int32_t j=0; j<m_N; j++)
01249 {
01250 if (look_back.get_element(i,j)==m_long_transition_threshold)
01251 num_long_transitions++ ;
01252 if (look_back.get_element(i,j)==INT_MAX)
01253 {
01254 if (long_transitions)
01255 {
01256 look_back.set_element(m_long_transition_threshold, i, j) ;
01257
01258 }
01259 else
01260 {
01261 look_back.set_element(max_look_back, i, j) ;
01262
01263 }
01264 }
01265 }
01266 SG_DEBUG("Using %i long transitions\n", num_long_transitions) ;
01267 }
01268
01269
01270
01271 SG_DEBUG("use_svm=%i\n", use_svm) ;
01272
01273 SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest);
01274 const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ;
01275 SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
01276
01277
01278
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289
01290 ASSERT(nbest<32000) ;
01291
01292
01293
01294 CArray3<float64_t> delta(m_seq_len, m_N, nbest) ;
01295 delta.set_array_name("delta");
01296 float64_t* delta_array = delta.get_array() ;
01297
01298
01299 CArray3<T_STATES> psi(m_seq_len, m_N, nbest) ;
01300 psi.set_array_name("psi");
01301
01302
01303 CArray3<int16_t> ktable(m_seq_len, m_N, nbest) ;
01304 ktable.set_array_name("ktable");
01305
01306
01307 CArray3<int32_t> ptable(m_seq_len, m_N, nbest) ;
01308 ptable.set_array_name("ptable");
01309
01310
01311 CArray<float64_t> delta_end(nbest) ;
01312 delta_end.set_array_name("delta_end");
01313
01314
01315 CArray<T_STATES> path_ends(nbest) ;
01316 path_ends.set_array_name("path_ends");
01317
01318
01319 CArray<int16_t> ktable_end(nbest) ;
01320 ktable_end.set_array_name("ktable_end");
01321
01322
01323 float64_t * fixedtempvv=SG_MALLOC(float64_t, look_back_buflen);
01324 memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
01325 int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen);
01326 memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
01327
01328 CArray<float64_t> oldtempvv(look_back_buflen) ;
01329 oldtempvv.set_array_name("oldtempvv");
01330 CArray<float64_t> oldtempvv2(look_back_buflen) ;
01331 oldtempvv2.set_array_name("oldtempvv2");
01332
01333
01334
01335 CArray<int32_t> oldtempii(look_back_buflen) ;
01336 oldtempii.set_array_name("oldtempii");
01337 CArray<int32_t> oldtempii2(look_back_buflen) ;
01338 oldtempii2.set_array_name("oldtempii2");
01339
01340
01341 CArray<T_STATES> state_seq(m_seq_len) ;
01342 state_seq.set_array_name("state_seq");
01343
01344
01345 CArray<int32_t> pos_seq(m_seq_len) ;
01346 pos_seq.set_array_name("pos_seq");
01347
01348
01349
01350 m_dict_weights.set_array_name("dict_weights") ;
01351 m_word_degree.set_array_name("word_degree") ;
01352 m_cum_num_words.set_array_name("cum_num_words") ;
01353 m_num_words.set_array_name("num_words") ;
01354
01355
01356
01357 m_num_unique_words.set_array_name("num_unique_words") ;
01358
01359 PEN.set_array_name("PEN") ;
01360 seq.set_array_name("seq") ;
01361
01362 delta.set_array_name("delta") ;
01363 psi.set_array_name("psi") ;
01364 ktable.set_array_name("ktable") ;
01365 ptable.set_array_name("ptable") ;
01366 delta_end.set_array_name("delta_end") ;
01367 path_ends.set_array_name("path_ends") ;
01368 ktable_end.set_array_name("ktable_end") ;
01369
01370 #ifdef USE_TMP_ARRAYCLASS
01371 fixedtempvv.set_array_name("fixedtempvv") ;
01372 fixedtempii.set_array_name("fixedtempvv") ;
01373 #endif
01374
01375 oldtempvv.set_array_name("oldtempvv") ;
01376 oldtempvv2.set_array_name("oldtempvv2") ;
01377 oldtempii.set_array_name("oldtempii") ;
01378 oldtempii2.set_array_name("oldtempii2") ;
01379
01380
01382
01383 #ifdef DYNPROG_DEBUG
01384 state_seq.display_size() ;
01385 pos_seq.display_size() ;
01386
01387 m_dict_weights.display_size() ;
01388 m_word_degree.display_array() ;
01389 m_cum_num_words.display_array() ;
01390 m_num_words.display_array() ;
01391
01392
01393
01394 m_num_unique_words.display_array() ;
01395
01396 PEN.display_size() ;
01397 PEN_state_signals.display_size() ;
01398 seq.display_size() ;
01399 m_orf_info.display_size() ;
01400
01401
01402 delta.display_size() ;
01403 psi.display_size() ;
01404 ktable.display_size() ;
01405 ptable.display_size() ;
01406 delta_end.display_size() ;
01407 path_ends.display_size() ;
01408 ktable_end.display_size() ;
01409
01410 #ifdef USE_TMP_ARRAYCLASS
01411 fixedtempvv.display_size() ;
01412 fixedtempii.display_size() ;
01413 #endif
01414
01415
01416
01417
01418 state_seq.display_size() ;
01419 pos_seq.display_size() ;
01420
01421
01422
01423 #endif //DYNPROG_DEBUG
01424
01426
01427
01428
01429 {
01430 for (int32_t s=0; s<m_num_svms; s++)
01431 ASSERT(m_string_words_array[s]<1) ;
01432 }
01433
01434
01435
01436
01437
01438 {
01439
01440 for (T_STATES i=0; i<m_N; i++)
01441 {
01442
01443 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ;
01444 psi.element(0,i,0) = 0 ;
01445 if (nbest>1)
01446 ktable.element(0,i,0) = 0 ;
01447 ptable.element(0,i,0) = 0 ;
01448 for (int16_t k=1; k<nbest; k++)
01449 {
01450 int32_t dim1, dim2, dim3 ;
01451 delta.get_array_size(dim1, dim2, dim3) ;
01452
01453
01454 delta.element(delta_array, 0, i, k, m_seq_len, m_N) = -CMath::INFTY ;
01455 psi.element(0,i,0) = 0 ;
01456 if (nbest>1)
01457 ktable.element(0,i,k) = 0 ;
01458 ptable.element(0,i,k) = 0 ;
01459 }
01460
01461
01462
01463
01464
01465
01466
01467
01468
01469
01470
01471
01472
01473
01474
01475
01476
01477
01478 }
01479 }
01480
01481 SG_DEBUG("START_RECURSION \n\n");
01482
01483
01484 for (int32_t t=1; t<m_seq_len; t++)
01485 {
01486
01487
01488
01489
01490 for (T_STATES j=0; j<m_N; j++)
01491 {
01492 if (seq.element(j,t)<=-1e20)
01493 {
01494 for (int16_t k=0; k<nbest; k++)
01495 {
01496 delta.element(delta_array, t, j, k, m_seq_len, m_N) = seq.element(j,t) ;
01497 psi.element(t,j,k) = 0 ;
01498 if (nbest>1)
01499 ktable.element(t,j,k) = 0 ;
01500 ptable.element(t,j,k) = 0 ;
01501 }
01502 }
01503 else
01504 {
01505 const T_STATES num_elem = trans_list_forward_cnt[j] ;
01506 const T_STATES *elem_list = trans_list_forward[j] ;
01507 const float64_t *elem_val = trans_list_forward_val[j] ;
01508 const int32_t *elem_id = trans_list_forward_id[j] ;
01509
01510 int32_t fixed_list_len = 0 ;
01511 float64_t fixedtempvv_ = CMath::INFTY ;
01512 int32_t fixedtempii_ = 0 ;
01513 bool fixedtemplong = false ;
01514
01515 for (int32_t i=0; i<num_elem; i++)
01516 {
01517 T_STATES ii = elem_list[i] ;
01518
01519 const CPlifBase * penalty = PEN.element(j,ii) ;
01520
01521
01522
01523
01524
01525
01526
01527
01528
01529
01530
01531
01532 int32_t look_back_ = look_back.element(j, ii) ;
01533
01534 int32_t orf_from = m_orf_info.element(ii,0) ;
01535 int32_t orf_to = m_orf_info.element(j,1) ;
01536 if((orf_from!=-1)!=(orf_to!=-1))
01537 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01538 ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01539
01540 int32_t orf_target = -1 ;
01541 if (orf_from!=-1)
01542 {
01543 orf_target=orf_to-orf_from ;
01544 if (orf_target<0)
01545 orf_target+=3 ;
01546 ASSERT(orf_target>=0 && orf_target<3) ;
01547 }
01548
01549 int32_t orf_last_pos = m_pos[t] ;
01550 #ifdef DYNPROG_TIMING
01551 MyTime3.start() ;
01552 #endif
01553 int32_t num_ok_pos = 0 ;
01554
01555 for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--)
01556 {
01557 bool ok ;
01558
01559
01560
01561
01562
01563
01564
01565
01566
01567 if (orf_target==-1)
01568 ok=true ;
01569 else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
01570 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
01571 else
01572 ok=false ;
01573
01574 if (ok)
01575 {
01576
01577 float64_t segment_loss = 0.0 ;
01578 if (with_loss)
01579 {
01580 segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]);
01581
01582
01583 }
01585
01587
01588 int32_t frame = orf_from;
01589 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01590
01591 float64_t pen_val = 0.0 ;
01592 if (penalty)
01593 {
01594 #ifdef DYNPROG_TIMING_DETAIL
01595 MyTime.start() ;
01596 #endif
01597 pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01598
01599 #ifdef DYNPROG_TIMING_DETAIL
01600 MyTime.stop() ;
01601 content_plifs_time += MyTime.time_diff_sec() ;
01602 #endif
01603 }
01604
01605 #ifdef DYNPROG_TIMING_DETAIL
01606 MyTime.start() ;
01607 #endif
01608 num_ok_pos++ ;
01609
01610 if (nbest==1)
01611 {
01612 float64_t val = elem_val[i] + pen_val ;
01613 if (with_loss)
01614 val += segment_loss ;
01615
01616 float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
01617
01618 if (mval<fixedtempvv_)
01619 {
01620 fixedtempvv_ = mval ;
01621 fixedtempii_ = ii + ts*m_N;
01622 fixed_list_len = 1 ;
01623 fixedtemplong = false ;
01624 }
01625 }
01626 else
01627 {
01628 for (int16_t diff=0; diff<nbest; diff++)
01629 {
01630 float64_t val = elem_val[i] ;
01631 val += pen_val ;
01632 if (with_loss)
01633 val += segment_loss ;
01634
01635 float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
01636
01637
01638
01639
01640
01641 if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
01642 {
01643 if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
01644 {
01645 fixedtempvv[fixed_list_len] = mval ;
01646 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
01647 fixed_list_len++ ;
01648 }
01649 else
01650 {
01651 int32_t addhere = fixed_list_len;
01652 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
01653 addhere--;
01654
01655
01656 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
01657 {
01658 fixedtempvv[jj] = fixedtempvv[jj-1];
01659 fixedtempii[jj] = fixedtempii[jj-1];
01660 }
01661
01662 fixedtempvv[addhere] = mval;
01663 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
01664
01665 if (fixed_list_len < nbest)
01666 fixed_list_len++;
01667 }
01668 }
01669 }
01670 }
01671 #ifdef DYNPROG_TIMING_DETAIL
01672 MyTime.stop() ;
01673 inner_loop_max_time += MyTime.time_diff_sec() ;
01674 #endif
01675 }
01676 }
01677 #ifdef DYNPROG_TIMING
01678 MyTime3.stop() ;
01679 inner_loop_time += MyTime3.time_diff_sec() ;
01680 #endif
01681 }
01682 for (int32_t i=0; i<num_elem; i++)
01683 {
01684 T_STATES ii = elem_list[i] ;
01685
01686 const CPlifBase * penalty = PEN.element(j,ii) ;
01687
01688
01689
01690
01691
01692
01693
01694
01695
01696
01697
01698
01699 int32_t look_back_ = look_back.element(j, ii) ;
01700
01701
01702 int32_t orf_from = m_orf_info.element(ii,0) ;
01703 int32_t orf_to = m_orf_info.element(j,1) ;
01704 if((orf_from!=-1)!=(orf_to!=-1))
01705 SG_DEBUG("j=%i ii=%i orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01706 ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01707
01708 int32_t orf_target = -1 ;
01709 if (orf_from!=-1)
01710 {
01711 orf_target=orf_to-orf_from ;
01712 if (orf_target<0)
01713 orf_target+=3 ;
01714 ASSERT(orf_target>=0 && orf_target<3) ;
01715 }
01716
01717
01718
01719
01720 #ifdef DYNPROG_TIMING
01721 MyTime3.start() ;
01722 #endif
01723
01724
01725
01726
01727
01728
01729
01730
01731 #ifdef DYNPROG_TIMING
01732 MyTime3.start() ;
01733 #endif
01734
01735 if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold )
01736 {
01737
01738
01739
01740 int32_t start = long_transition_content_start.get_element(ii, j) ;
01741 int32_t end_5p_part = start ;
01742 for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++)
01743 {
01744
01745 while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold)
01746 end_5p_part++ ;
01747
01748 ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t) ;
01749 ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold) ;
01750
01751 float64_t pen_val = 0.0;
01752
01753 if (penalty)
01754 {
01755 int32_t frame = m_orf_info.element(ii,0);
01756 lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame);
01757 pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ;
01758 }
01759
01760
01761
01762
01763
01764
01765
01766 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ;
01767
01768
01769 float64_t segment_loss_part1=0.0 ;
01770 if (with_loss)
01771 {
01772
01773 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part , end_5p_part, elem_id[i]);
01774
01775 mval_trans -= segment_loss_part1 ;
01776 }
01777
01778
01779 if (0)
01780 {
01781
01782
01783
01784
01785 long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ;
01786 long_transition_content_start_position.set_element(0, ii, j) ;
01787 if (with_loss)
01788 long_transition_content_scores_loss.set_element(0.0, ii, j) ;
01789 #ifdef DYNPROG_DEBUG
01790 long_transition_content_scores_pen.set_element(0.0, ii, j) ;
01791 long_transition_content_scores_elem.set_element(0.0, ii, j) ;
01792 long_transition_content_scores_prev.set_element(0.0, ii, j) ;
01793 long_transition_content_end_position.set_element(0, ii, j) ;
01794 #endif
01795 }
01796 if (with_loss)
01797 {
01798 float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ;
01799 float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]);
01800 float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ;
01801 long_transition_content_scores.set_element(score, ii, j) ;
01802 long_transition_content_scores_loss.set_element(new_loss, ii, j) ;
01803 #ifdef DYNPROG_DEBUG
01804 long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01805 #endif
01806
01807 }
01808 if (-long_transition_content_scores.get_element(ii, j) > mval_trans )
01809 {
01810
01811 long_transition_content_scores.set_element(-mval_trans, ii, j) ;
01812 long_transition_content_start_position.set_element(start_5p_part, ii, j) ;
01813 if (with_loss)
01814 long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ;
01815 #ifdef DYNPROG_DEBUG
01816 long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ;
01817 long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ;
01818 long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ;
01819
01820
01821
01822 long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01823 #endif
01824 }
01825
01826
01827
01828
01829 long_transition_content_start.set_element(start_5p_part, ii, j) ;
01830 }
01831
01832
01833
01834
01835
01836
01837
01838 int ts = t;
01839 while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
01840 ts-- ;
01841
01842 if (ts>0)
01843 {
01844 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ;
01845
01846
01847
01848 float pen_val_3p = 0.0 ;
01849 if (penalty)
01850 {
01851 int32_t frame = orf_from ;
01852 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01853 pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01854 }
01855
01856 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ;
01857
01858 {
01859 #ifdef DYNPROG_DEBUG
01860 float64_t segment_loss_part2=0.0 ;
01861 float64_t segment_loss_part1=0.0 ;
01862 #endif
01863 float64_t segment_loss_total=0.0 ;
01864
01865 if (with_loss)
01866 {
01867
01868
01869 #ifdef DYNPROG_DEBUG
01870
01871 segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
01872
01873 segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
01874 #endif
01875 segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
01876 mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
01877 }
01878
01879 #ifdef DYNPROG_DEBUG
01880 if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561)
01881 {
01882 SG_PRINT("Part2: %i,%i,%i: val=%1.6f pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i, loss=%1.1f/%1.1f (%i,%i)\n",
01883 m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1],
01884 long_transition_content_scores.get_element(ii, j),
01885 long_transition_content_scores_pen.get_element(ii, j),
01886 long_transition_content_scores_prev.get_element(ii, j),
01887 long_transition_content_scores_elem.get_element(ii, j),
01888 long_transition_content_scores_loss.get_element(ii, j),
01889 m_pos[long_transition_content_start_position.get_element(ii,j)],
01890 m_pos[long_transition_content_end_position.get_element(ii,j)],
01891 m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
01892 SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] );
01893 }
01894
01895 if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
01896 {
01897 SG_ERROR("LOSS: total=%1.1f (%i-%i) part1=%1.1f/%1.1f (%i-%i) part2=%1.1f (%i-%i) sum=%1.1f diff=%1.1f\n",
01898 segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t],
01899 long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
01900 segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t],
01901 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j),
01902 segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
01903 }
01904 #endif
01905 }
01906
01907
01908
01909
01910
01911
01912 if (mval < fixedtempvv_)
01913 {
01914
01915 int32_t fromtjk = fixedtempii_ ;
01916
01917
01918
01919
01920
01921 ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ;
01922
01923 fixedtempvv_ = mval ;
01924 fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ;
01925 fixed_list_len = 1 ;
01926 fixedtemplong = true ;
01927 }
01928 }
01929 }
01930 }
01931 #ifdef DYNPROG_TIMING
01932 MyTime3.stop() ;
01933 long_transition_time += MyTime3.time_diff_sec() ;
01934 #endif
01935
01936
01937 int32_t numEnt = fixed_list_len;
01938
01939 float64_t minusscore;
01940 int64_t fromtjk;
01941
01942 for (int16_t k=0; k<nbest; k++)
01943 {
01944 if (k<numEnt)
01945 {
01946 if (nbest==1)
01947 {
01948 minusscore = fixedtempvv_ ;
01949 fromtjk = fixedtempii_ ;
01950 }
01951 else
01952 {
01953 minusscore = fixedtempvv[k];
01954 fromtjk = fixedtempii[k];
01955 }
01956
01957 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -minusscore + seq.element(j,t);
01958 psi.element(t,j,k) = (fromtjk%m_N) ;
01959 if (nbest>1)
01960 ktable.element(t,j,k) = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ;
01961 ptable.element(t,j,k) = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
01962 }
01963 else
01964 {
01965 delta.element(delta_array, t, j, k, m_seq_len, m_N) = -CMath::INFTY ;
01966 psi.element(t,j,k) = 0 ;
01967 if (nbest>1)
01968 ktable.element(t,j,k) = 0 ;
01969 ptable.element(t,j,k) = 0 ;
01970 }
01971 }
01972 }
01973 }
01974 }
01975 {
01976 int32_t list_len = 0 ;
01977 for (int16_t diff=0; diff<nbest; diff++)
01978 {
01979 for (T_STATES i=0; i<m_N; i++)
01980 {
01981 oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ;
01982 oldtempii[list_len] = i + diff*m_N ;
01983 list_len++ ;
01984 }
01985 }
01986
01987 CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
01988
01989 for (int16_t k=0; k<nbest; k++)
01990 {
01991 delta_end.element(k) = -oldtempvv[k] ;
01992 path_ends.element(k) = (oldtempii[k]%m_N) ;
01993 if (nbest>1)
01994 ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ;
01995 }
01996
01997
01998 }
01999
02000 {
02001 for (int16_t k=0; k<nbest; k++)
02002 {
02003 prob_nbest[k]= delta_end.element(k) ;
02004
02005 int32_t i = 0 ;
02006 state_seq[i] = path_ends.element(k) ;
02007 int16_t q = 0 ;
02008 if (nbest>1)
02009 q=ktable_end.element(k) ;
02010 pos_seq[i] = m_seq_len-1 ;
02011
02012 while (pos_seq[i]>0)
02013 {
02014 ASSERT(i+1<m_seq_len);
02015
02016 state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02017 pos_seq[i+1] = ptable.element(pos_seq[i], state_seq[i], q) ;
02018 if (nbest>1)
02019 q = ktable.element(pos_seq[i], state_seq[i], q) ;
02020 i++ ;
02021 }
02022
02023 int32_t num_states = i+1 ;
02024 for (i=0; i<num_states;i++)
02025 {
02026 my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ;
02027 my_pos_seq[i+k*m_seq_len] = pos_seq[num_states-i-1] ;
02028 }
02029 if (num_states<m_seq_len)
02030 {
02031 my_state_seq[num_states+k*m_seq_len]=-1 ;
02032 my_pos_seq[num_states+k*m_seq_len]=-1 ;
02033 }
02034 }
02035 }
02036
02037
02038
02039
02040
02041 #ifdef DYNPROG_TIMING
02042 MyTime2.stop() ;
02043
02044
02045 SG_PRINT("Timing: orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s svm_pos=%1.2f svm_clean=%1.2f\n content_svm_values_time=%1.2f content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) ;
02046 #endif
02047
02048 SG_FREE(fixedtempvv);
02049 SG_FREE(fixedtempii);
02050 }
02051
02052
02053 void CDynProg::best_path_trans_deriv(
02054 int32_t *my_state_seq, int32_t *my_pos_seq,
02055 int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
02056 {
02057 m_initial_state_distribution_p_deriv.resize_array(m_N) ;
02058 m_end_state_distribution_q_deriv.resize_array(m_N) ;
02059 m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
02060
02061
02062 m_my_scores.resize_array(my_seq_len);
02063 m_my_losses.resize_array(my_seq_len);
02064 float64_t* my_scores=m_my_scores.get_array();
02065 float64_t* my_losses=m_my_losses.get_array();
02066 CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
02067 CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
02068
02069 if (!m_svm_arrays_clean)
02070 {
02071 SG_ERROR( "SVM arrays not clean") ;
02072 return ;
02073 } ;
02074
02075
02076
02077
02078
02079 bool use_svm = false ;
02080
02081 CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ;
02082 PEN.set_array_name("PEN");
02083 CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ;
02084 PEN_state_signals.set_array_name("PEN_state_signals");
02085 CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
02086 seq_input.set_array_name("seq_input");
02087
02088 {
02089 for (int32_t i=0; i<m_N; i++)
02090 for (int32_t j=0; j<m_N; j++)
02091 {
02092 CPlifBase *penij=PEN.element(i,j) ;
02093 if (penij==NULL)
02094 continue ;
02095
02096 if (penij->uses_svm_values())
02097 use_svm=true ;
02098 penij->penalty_clear_derivative() ;
02099 }
02100 for (int32_t i=0; i<m_N; i++)
02101 for (int32_t j=0; j<max_num_signals; j++)
02102 {
02103 CPlifBase *penij=PEN_state_signals.element(i,j) ;
02104 if (penij==NULL)
02105 continue ;
02106 if (penij->uses_svm_values())
02107 use_svm=true ;
02108 penij->penalty_clear_derivative() ;
02109 }
02110 }
02111
02112 {
02113
02114 for (int32_t i=0; i<m_N; i++)
02115 {
02116 m_initial_state_distribution_p_deriv.element(i)=0 ;
02117 m_end_state_distribution_q_deriv.element(i)=0 ;
02118 for (int32_t j=0; j<m_N; j++)
02119 m_transition_matrix_a_deriv.element(i,j)=0 ;
02120 }
02121 }
02122
02123 {
02124 for (int32_t i=0; i<my_seq_len; i++)
02125 {
02126 my_scores[i]=0.0 ;
02127 my_losses[i]=0.0 ;
02128 }
02129 }
02130
02131
02132
02133
02134
02135
02136
02137 float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02138 float64_t* svm_value_part1 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02139 float64_t* svm_value_part2 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02140 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02141 {
02142 svm_value[s]=0 ;
02143 svm_value_part1[s]=0 ;
02144 svm_value_part2[s]=0 ;
02145 }
02146
02147
02148 float64_t total_score = 0.0 ;
02149 float64_t total_loss = 0.0 ;
02150
02151
02152 ASSERT(my_state_seq[0]>=0) ;
02153 m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
02154 my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ;
02155
02156 ASSERT(my_state_seq[my_seq_len-1]>=0) ;
02157 m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
02158 my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
02159
02160
02161 total_score += my_scores[0] + my_scores[my_seq_len-1] ;
02162
02163
02164 SG_DEBUG( "m_seq_len=%i\n", my_seq_len) ;
02165 for (int32_t i=0; i<my_seq_len-1; i++)
02166 {
02167 if (my_state_seq[i+1]==-1)
02168 break ;
02169 int32_t from_state = my_state_seq[i] ;
02170 int32_t to_state = my_state_seq[i+1] ;
02171 int32_t from_pos = my_pos_seq[i] ;
02172 int32_t to_pos = my_pos_seq[i+1] ;
02173
02174 int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ;
02175 my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id);
02176
02177 #ifdef DYNPROG_DEBUG
02178
02179
02180 if (i>0)
02181 {
02182 float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id);
02183 float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id);
02184 float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id);
02185 SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3);
02186 if (CMath::abs(loss1+loss2-loss3)>0)
02187 {
02188 SG_PRINT( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02189 }
02190 }
02191 io->set_loglevel(M_DEBUG) ;
02192 SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02193 #endif
02194
02195 m_transition_matrix_a_deriv.element(from_state, to_state)++ ;
02196 my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ;
02197
02198 #ifdef DYNPROG_DEBUG
02199 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02200 #endif
02201
02202
02203
02204
02205
02206 bool is_long_transition = false ;
02207 if (m_long_transitions)
02208 {
02209 if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold)
02210 is_long_transition = true ;
02211 if (m_orf_info.element(from_state,0)!=-1)
02212 is_long_transition = false ;
02213 }
02214
02215 int32_t from_pos_thresh = from_pos ;
02216 int32_t to_pos_thresh = to_pos ;
02217
02218 if (use_svm)
02219 {
02220 if (is_long_transition)
02221 {
02222
02223 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold)
02224 from_pos_thresh++ ;
02225 ASSERT(from_pos_thresh<to_pos) ;
02226 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold);
02227 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold);
02228
02229 int32_t frame = m_orf_info.element(from_state,0);
02230 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame);
02231
02232 #ifdef DYNPROG_DEBUG
02233 SG_PRINT("part1: pos1: %i pos2: %i pos3: %i \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) ;
02234 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02235 SG_PRINT("%1.4f ", svm_value_part1[s]);
02236 SG_PRINT("\n");
02237 #endif
02238
02239 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold)
02240 to_pos_thresh-- ;
02241 ASSERT(to_pos_thresh>0) ;
02242 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) ;
02243 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) ;
02244
02245 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame);
02246
02247 #ifdef DYNPROG_DEBUG
02248 SG_PRINT("part2: pos1: %i pos2: %i pos3: %i \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) ;
02249 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02250 SG_PRINT("%1.4f ", svm_value_part2[s]);
02251 SG_PRINT("\n");
02252 #endif
02253 }
02254 else
02255 {
02256
02257
02258
02259 int32_t frame = m_orf_info.element(from_state,0);
02260 if (false)
02261 {
02262 int32_t num_current_svms=0;
02263 int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02264 SG_PRINT("penalties(%i, %i), frame:%i ", from_state, to_state, frame);
02265 PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids);
02266 SG_PRINT("\n");
02267 }
02268
02269 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame);
02270 #ifdef DYNPROG_DEBUG
02271 SG_PRINT("part2: pos1: %i pos2: %i \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) ;
02272 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02273 SG_PRINT("%1.4f ", svm_value[s]);
02274 SG_PRINT("\n");
02275 #endif
02276 }
02277 }
02278
02279 if (PEN.element(to_state, from_state)!=NULL)
02280 {
02281 float64_t nscore = 0 ;
02282 if (is_long_transition)
02283 {
02284 float64_t pen_value_part1 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ;
02285 float64_t pen_value_part2 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ;
02286 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
02287 }
02288 else
02289 nscore = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ;
02290
02291 if (false)
02292 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n",
02293 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ;
02294
02295 my_scores[i] += nscore ;
02296
02297 for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)
02298 {
02299 svm_value[s]=-CMath::INFTY;
02300 svm_value_part1[s]=-CMath::INFTY;
02301 svm_value_part2[s]=-CMath::INFTY;
02302 }
02303
02304 #ifdef DYNPROG_DEBUG
02305
02306 #endif
02307 if (is_long_transition)
02308 {
02309 #ifdef DYNPROG_DEBUG
02310 float64_t sum_score = 0.0 ;
02311
02312 for (int kk=0; kk<i; kk++)
02313 sum_score += my_scores[i] ;
02314
02315 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i) 2: %1.6f (%i-%i) \n",
02316 is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state,
02317 nscore, sum_score,
02318 PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh],
02319 PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
02320 #endif
02321 }
02322
02323 if (is_long_transition)
02324 {
02325 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ;
02326 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
02327 }
02328 else
02329 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ;
02330
02331
02332
02333
02334
02335
02336
02337 if (is_long_transition)
02338 {
02339 for (int32_t d=1; d<=m_num_raw_data; d++)
02340 {
02341 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02342 svm_value[s]=-CMath::INFTY;
02343 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
02344 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d);
02345 for (int32_t k=0;k<num_intensities;k++)
02346 {
02347 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02348 svm_value[j]=intensities[k];
02349
02350 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02351
02352 }
02353 num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d);
02354 for (int32_t k=0;k<num_intensities;k++)
02355 {
02356 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02357 svm_value[j]=intensities[k];
02358
02359 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02360
02361 }
02362 SG_FREE(intensities);
02363
02364 }
02365 }
02366 else
02367 {
02368 for (int32_t d=1; d<=m_num_raw_data; d++)
02369 {
02370 for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02371 svm_value[s]=-CMath::INFTY;
02372 float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
02373 int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d);
02374
02375 for (int32_t k=0;k<num_intensities;k++)
02376 {
02377 for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02378 svm_value[j]=intensities[k];
02379
02380 PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ;
02381
02382 }
02383 SG_FREE(intensities);
02384 }
02385 }
02386
02387 }
02388 #ifdef DYNPROG_DEBUG
02389 SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02390 #endif
02391
02392
02393 for (int32_t k=0; k<max_num_signals; k++)
02394 {
02395 if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
02396 {
02397 #ifdef DYNPROG_DEBUG
02398 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
02399 #endif
02400 my_scores[i] += seq_input.element(to_state, to_pos, k) ;
02401
02402
02403 break ;
02404 }
02405 if (PEN_state_signals.element(to_state, k)!=NULL)
02406 {
02407 float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ;
02408 my_scores[i] += nscore ;
02409 #ifdef DYNPROG_DEBUG
02410 if (false)
02411 {
02412 SG_PRINT("is_long_transition=%i (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n",
02413 is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ;
02414 for (int x=0; x<23; x++)
02415 {
02416 for (int i=-10; i<10; i++)
02417 SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k));
02418 SG_PRINT("\n");
02419 }
02420
02421 }
02422 #endif
02423
02424
02425
02426
02427
02428
02429
02430
02431 #ifdef DYNPROG_DEBUG
02432 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
02433 #endif
02434 PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ;
02435 } else
02436 break ;
02437 }
02438
02439
02440
02441
02442 total_score += my_scores[i] ;
02443 total_loss += my_losses[i] ;
02444
02445 }
02446
02447
02448
02449
02450 SG_FREE(svm_value);
02451 SG_FREE(svm_value_part1);
02452 SG_FREE(svm_value_part2);
02453 }
02454
02455 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type)
02456 {
02457 ASSERT(from_pos<to_pos);
02458 int32_t num_intensities = 0;
02459 int32_t* p_tiling_pos = &m_probe_pos[m_num_probes_cum[type-1]];
02460 float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]];
02461 int32_t last_pos;
02462 int32_t num = m_num_probes_cum[type-1];
02463 while (*p_tiling_pos<to_pos)
02464 {
02465 if (*p_tiling_pos>=from_pos)
02466 {
02467 intensities[num_intensities] = *p_tiling_data;
02468 num_intensities++;
02469 }
02470 num++;
02471 if (num>=m_num_probes_cum[type])
02472 break;
02473 last_pos = *p_tiling_pos;
02474 p_tiling_pos++;
02475 p_tiling_data++;
02476 ASSERT(last_pos<*p_tiling_pos);
02477 }
02478 return num_intensities;
02479 }
02480
02481 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame)
02482 {
02483 #ifdef DYNPROG_TIMING_DETAIL
02484 MyTime.start() ;
02485 #endif
02486
02487
02488
02489 for (int32_t i=0;i<m_num_svms;i++)
02490 {
02491 float64_t to_val = m_lin_feat.get_element(i, to_state);
02492 float64_t from_val = m_lin_feat.get_element(i, from_state);
02493 svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
02494 }
02495 for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++)
02496 {
02497 float64_t to_val = m_lin_feat.get_element(i, to_state);
02498 float64_t from_val = m_lin_feat.get_element(i, from_state);
02499 svm_values[i] = to_val-from_val ;
02500 }
02501 if (m_intron_list)
02502 {
02503 int32_t* support = SG_MALLOC(int32_t, m_num_intron_plifs);
02504 m_intron_list->get_intron_support(support, from_state, to_state);
02505 int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data];
02506 int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;
02507 int32_t cnt = 0;
02508 for (int32_t i=intron_list_start; i<intron_list_end;i++)
02509 {
02510 svm_values[i] = (float64_t) (support[cnt]);
02511 cnt++;
02512 }
02513
02514
02515 SG_FREE(support);
02516 }
02517
02518 if (frame!=-1)
02519 {
02520 svm_values[frame_plifs[0]] = 1e10;
02521 svm_values[frame_plifs[1]] = 1e10;
02522 svm_values[frame_plifs[2]] = 1e10;
02523 int32_t global_frame = from_pos%3;
02524 int32_t row = ((global_frame+frame)%3)+4;
02525 float64_t to_val = m_lin_feat.get_element(row, to_state);
02526 float64_t from_val = m_lin_feat.get_element(row, from_state);
02527 svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
02528 }
02529 #ifdef DYNPROG_TIMING_DETAIL
02530 MyTime.stop() ;
02531 content_svm_values_time += MyTime.time_diff_sec() ;
02532 #endif
02533 }
02534 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs)
02535 {
02536 m_intron_list = intron_list;
02537 m_num_intron_plifs = num_plifs;
02538 }
02539