DynProg.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2007 Soeren Sonnenburg
00008  * Written (W) 1999-2007 Gunnar Raetsch
00009  * Written (W) 2008-2009 Jonas Behr
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include "structure/DynProg.h"
00014 #include "lib/Mathematics.h"
00015 #include "lib/io.h"
00016 #include "lib/config.h"
00017 #include "features/StringFeatures.h"
00018 #include "features/Alphabet.h"
00019 #include "structure/Plif.h"
00020 #include "structure/IntronList.h"
00021 #include "lib/Array.h"
00022 #include "lib/Array2.h"
00023 #include "lib/Array3.h"
00024 
00025 #include <stdlib.h>
00026 #include <stdio.h>
00027 #include <time.h>
00028 #include <ctype.h>
00029 
00030 using namespace shogun;
00031 
00032 //#define USE_TMP_ARRAYCLASS
00033 //#define DYNPROG_DEBUG
00034 
00035 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ;
00036 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ;
00037 int32_t CDynProg::frame_plifs[3]={4,5,6};
00038 int32_t CDynProg::num_words_default[4]=   {64,256,1024,4096} ;
00039 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1,
00040                                     1,1,1,1,1,1,1,1,
00041                                     0,0,0,0,0,0,0,0,
00042                                     0,0,0,0,0,0,0,0} ;  
00043 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true,
00044                                       false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts
00045 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0,
00046                                        1,1,1,1,1,1,1,1} ; // which string should be used
00047 
00048 CDynProg::CDynProg(int32_t num_svms /*= 8 */)
00049 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
00050     m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
00051     m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
00052     m_end_state_distribution_q_deriv(1),
00053 
00054       // multi svm
00055       m_num_degrees(4), 
00056       m_num_svms(num_svms), 
00057       m_word_degree(word_degree_default, m_num_degrees, true, true),
00058       m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
00059       m_cum_num_words_array(m_cum_num_words.get_array()),
00060       m_num_words(num_words_default, m_num_degrees, true, true),
00061       m_num_words_array(m_num_words.get_array()),
00062       m_mod_words(mod_words_default, m_num_svms, 2, true, true),
00063       m_mod_words_array(m_mod_words.get_array()),
00064       m_sign_words(sign_words_default, m_num_svms, true, true),
00065       m_sign_words_array(m_sign_words.get_array()),
00066       m_string_words(string_words_default, m_num_svms, true, true),
00067       m_string_words_array(m_string_words.get_array()),
00068       //m_svm_pos_start(m_num_degrees),
00069       m_num_unique_words(m_num_degrees),
00070       m_svm_arrays_clean(true),
00071 
00072       m_max_a_id(0), m_observation_matrix(1,1,1), 
00073       m_pos(1), 
00074       m_seq_len(0),
00075       m_orf_info(1,2), 
00076       m_plif_list(1), 
00077       m_PEN(1,1),
00078       m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2), 
00079       m_segment_ids(1),
00080       m_segment_mask(1),
00081       m_my_state_seq(1),
00082       m_my_pos_seq(1),
00083       m_my_scores(1),
00084       m_my_losses(1),
00085       m_scores(1),
00086       m_states(1,1),
00087       m_positions(1,1),
00088 
00089       m_seq_sparse1(NULL),
00090       m_seq_sparse2(NULL),
00091       m_plif_matrices(NULL),
00092       
00093       m_genestr_stop(1),
00094       m_intron_list(NULL),
00095       m_num_intron_plifs(0),
00096       m_lin_feat(1,1), //by Jonas
00097       m_raw_intensities(NULL),
00098       m_probe_pos(NULL),
00099       m_num_probes_cum(NULL),
00100       m_num_lin_feat_plifs_cum(NULL),
00101       m_num_raw_data(0),
00102       
00103       m_long_transitions(true),
00104       m_long_transition_threshold(1000)
00105 {
00106     trans_list_forward = NULL ;
00107     trans_list_forward_cnt = NULL ;
00108     trans_list_forward_val = NULL ;
00109     trans_list_forward_id = NULL ;
00110     trans_list_len = 0 ;
00111 
00112     mem_initialized = true ;
00113 
00114     m_N=1;
00115 
00116     m_raw_intensities = NULL;
00117     m_probe_pos = NULL;
00118     m_num_probes_cum = new int32_t[100];
00119     m_num_probes_cum[0] = 0;
00120     //m_use_tiling=false;
00121     m_num_lin_feat_plifs_cum = new int32_t[100];
00122     m_num_lin_feat_plifs_cum[0] = m_num_svms;
00123     m_num_raw_data = 0;
00124 #ifdef ARRAY_STATISTICS
00125     m_word_degree.set_name("word_degree");
00126 #endif
00127 
00128     m_transition_matrix_a_id.set_name("transition_matrix_a_id");
00129     m_transition_matrix_a.set_name("transition_matrix_a");
00130     m_transition_matrix_a_deriv.set_name("transition_matrix_a_deriv");
00131     m_mod_words.set_name("mod_words");
00132     m_orf_info.set_name("orf_info");
00133     m_segment_sum_weights.set_name("segment_sum_weights");
00134     m_PEN.set_name("PEN");
00135     m_PEN_state_signals.set_name("PEN_state_signals");
00136     m_dict_weights.set_name("dict_weights");
00137     m_states.set_name("states");
00138     m_positions.set_name("positions");
00139     m_lin_feat.set_name("lin_feat");
00140 
00141 
00142     m_observation_matrix.set_name("m_observation_matrix");
00143     m_segment_loss.set_name("m_segment_loss");
00144     m_seg_loss_obj = new CSegmentLoss();
00145 }
00146 
00147 CDynProg::~CDynProg()
00148 {
00149     if (trans_list_forward_cnt)
00150         delete[] trans_list_forward_cnt;
00151     if (trans_list_forward)
00152     {
00153         for (int32_t i=0; i<trans_list_len; i++)
00154         {
00155             if (trans_list_forward[i])
00156                 delete[] trans_list_forward[i];
00157         }
00158         delete[] trans_list_forward;
00159     }
00160     if (trans_list_forward_val)
00161     {
00162         for (int32_t i=0; i<trans_list_len; i++)
00163         {
00164             if (trans_list_forward_val[i])
00165                 delete[] trans_list_forward_val[i];
00166         }
00167         delete[] trans_list_forward_val;
00168     }
00169     if (trans_list_forward_id)
00170     {
00171         for (int32_t i=0; i<trans_list_len; i++)
00172         {
00173             if (trans_list_forward_id[i])
00174                 delete[] trans_list_forward_id[i];
00175         }
00176         delete[] trans_list_forward_id;
00177     }
00178     if (m_raw_intensities)
00179         delete[] m_raw_intensities;
00180     if (m_probe_pos)
00181         delete[] m_probe_pos;
00182     if (m_num_probes_cum)
00183       delete[] m_num_probes_cum ;
00184     if (m_num_lin_feat_plifs_cum)
00185       delete[] m_num_lin_feat_plifs_cum ;
00186 
00187     delete m_intron_list;
00188 
00189     SG_UNREF(m_seq_sparse1);
00190     SG_UNREF(m_seq_sparse2);
00191     SG_UNREF(m_plif_matrices);
00192 }
00193 
00195 int32_t CDynProg::get_num_svms()
00196 {
00197     return m_num_svms;
00198 }
00199 
00200 void CDynProg::precompute_stop_codons()
00201 {
00202     int32_t length=m_genestr.get_dim1();
00203 
00204     m_genestr_stop.resize_array(length) ;
00205     m_genestr_stop.zero() ;
00206     m_genestr_stop.set_name("genestr_stop") ;
00207     {
00208         for (int32_t i=0; i<length-2; i++)
00209             if ((m_genestr[i]=='t' || m_genestr[i]=='T') && 
00210                     (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') && 
00211                       (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) ||
00212                      ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') )))
00213             {
00214                 m_genestr_stop.element(i)=true ;
00215             }
00216             else
00217                 m_genestr_stop.element(i)=false ;
00218         m_genestr_stop.element(length-2)=false ;
00219         m_genestr_stop.element(length-1)=false ;
00220     }
00221 }
00222 
00223 void CDynProg::set_num_states(int32_t p_N)
00224 {
00225     m_N=p_N ;
00226 
00227     m_transition_matrix_a_id.resize_array(m_N,m_N) ;
00228     m_transition_matrix_a.resize_array(m_N,m_N) ;
00229     m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
00230     m_initial_state_distribution_p.resize_array(m_N) ;
00231     m_initial_state_distribution_p_deriv.resize_array(m_N) ;
00232     m_end_state_distribution_q.resize_array(m_N);
00233     m_end_state_distribution_q_deriv.resize_array(m_N) ;
00234 
00235     m_orf_info.resize_array(m_N,2) ;
00236     m_PEN.resize_array(m_N,m_N) ;
00237 }
00238 
00239 int32_t CDynProg::get_num_states()
00240 {
00241     return m_N;
00242 }
00243 
00244 void CDynProg::init_tiling_data(
00245     int32_t* probe_pos, float64_t* intensities, const int32_t num_probes)
00246 {
00247     m_num_raw_data++;
00248     m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes;
00249 
00250     int32_t* tmp_probe_pos = new int32_t[m_num_probes_cum[m_num_raw_data]];
00251     float64_t* tmp_raw_intensities = new float64_t[m_num_probes_cum[m_num_raw_data]];
00252 
00253 
00254     if (m_num_raw_data==1){
00255         memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00256         memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
00257         //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2);  
00258     }else{
00259         memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t)); 
00260         memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));   
00261         memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));    
00262         memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));  
00263     }
00264     delete[] m_probe_pos;
00265     delete[] m_raw_intensities;
00266     m_probe_pos = tmp_probe_pos; //new int32_t[num_probes];
00267     m_raw_intensities = tmp_raw_intensities;//new float64_t[num_probes];
00268 
00269     //memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00270     //memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t));
00271 
00272 }
00273 
00274 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms)
00275 {
00276     m_lin_feat.resize_array(p_num_svms, m_seq_len);
00277 
00278     // initialize array
00279     for (int s=0; s<p_num_svms; s++)
00280       for (int p=0; p<m_seq_len; p++)
00281         m_lin_feat.set_element(0.0, s, p) ;
00282 }
00283 
00284 void CDynProg::resize_lin_feat(const int32_t num_new_feat)
00285 {
00286     int32_t dim1, dim2;
00287     m_lin_feat.get_array_size(dim1, dim2);
00288     ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]);
00289     ASSERT(dim2==m_seq_len); // == number of candidate positions
00290 
00291 
00292 
00293     float64_t* arr = m_lin_feat.get_array();
00294     float64_t* tmp = new float64_t[(dim1+num_new_feat)*dim2];   
00295     memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
00296     for(int32_t j=0;j<m_seq_len;j++)
00297                 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
00298             tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
00299 
00300     m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);// copy array and free it later 
00301     delete[] tmp;
00302 
00303     /*for(int32_t j=0;j<5;j++)
00304     {
00305         for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++)
00306         {
00307             SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j));
00308         }
00309         SG_PRINT("\n");
00310     }
00311     m_lin_feat.get_array_size(dim1,dim2);
00312     SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2);*/
00313 
00314     //SG_PRINT("resize_lin_feat: done\n");
00315 }
00316 
00317 void CDynProg::precompute_tiling_plifs(
00318     CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs)
00319 {
00320     m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs;
00321     float64_t* tiling_plif = new float64_t[num_tiling_plifs];
00322     float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
00323     for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++)
00324         svm_value[i]=0.0;
00325     int32_t* tiling_rows = new int32_t[num_tiling_plifs];
00326     for (int32_t i=0; i<num_tiling_plifs; i++)
00327     {
00328         tiling_plif[i]=0.0;
00329         CPlif * plif = PEN[tiling_plif_ids[i]];
00330         tiling_rows[i] = plif->get_use_svm();
00331 
00332         ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
00333     }
00334     resize_lin_feat(num_tiling_plifs);
00335 
00336 
00337     int32_t* p_tiling_pos  = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
00338     float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
00339     int32_t num=m_num_probes_cum[m_num_raw_data-1];
00340 
00341     for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++)
00342     {
00343         while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx])
00344         {
00345             for (int32_t i=0; i<num_tiling_plifs; i++)
00346             {
00347                 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
00348                 CPlif * plif = PEN[tiling_plif_ids[i]];
00349                 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1);
00350                 plif->set_do_calc(true);
00351                 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00352                 plif->set_do_calc(false);
00353             }
00354             p_tiling_data++;
00355             p_tiling_pos++;
00356             num++;
00357         }
00358         for (int32_t i=0; i<num_tiling_plifs; i++)
00359             m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
00360     }
00361     delete[] svm_value;
00362     delete[] tiling_plif;
00363     delete[] tiling_rows;
00364 }
00365 
00366 void CDynProg::create_word_string()
00367 {
00368     delete[] m_wordstr;
00369     m_wordstr=new uint16_t**[5440];
00370     int32_t k=0;
00371     int32_t genestr_len=m_genestr.get_dim1();
00372 
00373     m_wordstr[k]=new uint16_t*[m_num_degrees] ;
00374     for (int32_t j=0; j<m_num_degrees; j++)
00375     {
00376         m_wordstr[k][j]=NULL ;
00377         {
00378             m_wordstr[k][j]=new uint16_t[genestr_len] ;
00379             for (int32_t i=0; i<genestr_len; i++)
00380                 switch (m_genestr[i])
00381                 {
00382                     case 'A':
00383                     case 'a': m_wordstr[k][j][i]=0 ; break ;
00384                     case 'C':
00385                     case 'c': m_wordstr[k][j][i]=1 ; break ;
00386                     case 'G':
00387                     case 'g': m_wordstr[k][j][i]=2 ; break ;
00388                     case 'T':
00389                     case 't': m_wordstr[k][j][i]=3 ; break ;
00390                     default: ASSERT(0) ;
00391                 }
00392             CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ;
00393         }
00394     }
00395 }
00396 
00397 void CDynProg::precompute_content_values()
00398 {
00399     for (int32_t s=0; s<m_num_svms; s++)
00400       m_lin_feat.set_element(0.0, s, 0);
00401 
00402     for (int32_t p=0 ; p<m_seq_len-1 ; p++)
00403     {
00404         int32_t from_pos = m_pos[p];
00405         int32_t to_pos = m_pos[p+1];
00406         float64_t* my_svm_values_unnormalized = new float64_t[m_num_svms];
00407         //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos);
00408         
00409         ASSERT(from_pos<=m_genestr.get_dim1());
00410         ASSERT(to_pos<=m_genestr.get_dim1());
00411         
00412         for (int32_t s=0; s<m_num_svms; s++)
00413             my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p);
00414 
00415         for (int32_t i=from_pos; i<to_pos; i++)
00416         {
00417             for (int32_t j=0; j<m_num_degrees; j++)
00418             {
00419                 uint16_t word = m_wordstr[0][j][i] ;
00420                 for (int32_t s=0; s<m_num_svms; s++)
00421                 {
00422                     // check if this k-mer should be considered for this SVM
00423                     if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1))
00424                         continue;
00425                     my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ;
00426                 }
00427             }
00428         }
00429         for (int32_t s=0; s<m_num_svms; s++)
00430         {
00431             float64_t prev = m_lin_feat.get_element(s, p);
00432             //SG_PRINT("elem (%i, %i, %f)\n", s, p, prev) ;
00433             if (prev<-1e20 || prev>1e20)
00434             {
00435                 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) ;
00436                 prev=0 ;
00437             }
00438             m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1);
00439         }
00440         delete[] my_svm_values_unnormalized;
00441     }
00442     //for (int32_t j=0; j<m_num_degrees; j++)
00443     //  delete[] m_wordstr[0][j] ;
00444     //delete[] m_wordstr[0] ;
00445 }
00446 
00447 void CDynProg::set_p_vector(float64_t *p, int32_t N)
00448 {
00449     if (!(N==m_N))
00450         SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",N, m_N);
00451 
00452     m_initial_state_distribution_p.set_array(p, N, true, true);
00453 }
00454 
00455 void CDynProg::set_q_vector(float64_t *q, int32_t N)
00456 {
00457     if (!(N==m_N))
00458         SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",N, m_N);
00459     m_end_state_distribution_q.set_array(q, N, true, true);
00460 }
00461 
00462 void CDynProg::set_a(float64_t *a, int32_t M, int32_t N)
00463 {
00464     ASSERT(N==m_N);
00465     ASSERT(M==N);
00466     m_transition_matrix_a.set_array(a, N, N, true, true);
00467     m_transition_matrix_a_deriv.resize_array(N, N);
00468 }
00469 
00470 void CDynProg::set_a_id(int32_t *a, int32_t M, int32_t N)
00471 {
00472     ASSERT(N==m_N);
00473     ASSERT(M==N);
00474     m_transition_matrix_a_id.set_array(a, N, N, true, true);
00475     m_max_a_id = 0;
00476     for (int32_t i=0; i<N; i++)
00477     {
00478         for (int32_t j=0; j<N; j++)
00479             m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j));
00480     }
00481 }
00482 
00483 void CDynProg::set_a_trans_matrix(
00484     float64_t *a_trans, int32_t num_trans, int32_t num_cols)
00485 {
00486 
00487     //CMath::display_matrix(a_trans,num_trans, num_cols,"a_trans");
00488 
00489     if (!((num_cols==3) || (num_cols==4)))
00490         SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols);
00491 
00492     delete[] trans_list_forward ;
00493     delete[] trans_list_forward_cnt ;
00494     delete[] trans_list_forward_val ;
00495     delete[] trans_list_forward_id ;
00496 
00497     trans_list_forward = NULL ;
00498     trans_list_forward_cnt = NULL ;
00499     trans_list_forward_val = NULL ;
00500     trans_list_len = 0 ;
00501 
00502     m_transition_matrix_a.zero() ;
00503     m_transition_matrix_a_id.zero() ;
00504 
00505     mem_initialized = true ;
00506 
00507     trans_list_forward_cnt=NULL ;
00508     trans_list_len = m_N ;
00509     trans_list_forward = new T_STATES*[m_N] ;
00510     trans_list_forward_cnt = new T_STATES[m_N] ;
00511     trans_list_forward_val = new float64_t*[m_N] ;
00512     trans_list_forward_id = new int32_t*[m_N] ;
00513     
00514     int32_t start_idx=0;
00515     for (int32_t j=0; j<m_N; j++)
00516     {
00517         int32_t old_start_idx=start_idx;
00518 
00519         while (start_idx<num_trans && a_trans[start_idx+num_trans]==j)
00520         {
00521             start_idx++;
00522             
00523             if (start_idx>1 && start_idx<num_trans)
00524                 ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00525         }
00526         
00527         if (start_idx>1 && start_idx<num_trans)
00528             ASSERT(a_trans[start_idx+num_trans-1] <= a_trans[start_idx+num_trans]);
00529         
00530         int32_t len=start_idx-old_start_idx;
00531         ASSERT(len>=0);
00532         
00533         trans_list_forward_cnt[j] = 0 ;
00534         
00535         if (len>0)
00536         {
00537             trans_list_forward[j]     = new T_STATES[len] ;
00538             trans_list_forward_val[j] = new float64_t[len] ;
00539             trans_list_forward_id[j] = new int32_t[len] ;
00540         }
00541         else
00542         {
00543             trans_list_forward[j]     = NULL;
00544             trans_list_forward_val[j] = NULL;
00545             trans_list_forward_id[j]  = NULL;
00546         }
00547     }
00548     
00549     for (int32_t i=0; i<num_trans; i++)
00550     {
00551         int32_t from_state   = (int32_t)a_trans[i] ;
00552         int32_t to_state = (int32_t)a_trans[i+num_trans] ;
00553         float64_t val = a_trans[i+num_trans*2] ;
00554         int32_t id = 0 ;
00555         if (num_cols==4)
00556             id = (int32_t)a_trans[i+num_trans*3] ;
00557         //SG_DEBUG( "id=%i\n", id) ;
00558             
00559         ASSERT(to_state>=0 && to_state<m_N);
00560         ASSERT(from_state>=0 && from_state<m_N);
00561         
00562         trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00563         trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00564         trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00565         trans_list_forward_cnt[to_state]++ ;
00566         m_transition_matrix_a.element(from_state, to_state) = val ;
00567         m_transition_matrix_a_id.element(from_state, to_state) = id ;
00568         //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,m_transition_matrix_a_id.element(from_state, to_state));
00569     } ;
00570 
00571     m_max_a_id = 0 ;
00572     for (int32_t i=0; i<m_N; i++)
00573         for (int32_t j=0; j<m_N; j++)
00574         {
00575             //if (m_transition_matrix_a_id.element(i,j))
00576             //SG_DEBUG( "(%i,%i)=%i\n", i,j, m_transition_matrix_a_id.element(i,j)) ;
00577             m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ;
00578         }
00579     //SG_DEBUG( "m_max_a_id=%i\n", m_max_a_id) ;
00580 }
00581 
00582 
00583 void CDynProg::init_mod_words_array(
00584     int32_t * mod_words_input, int32_t num_elem, int32_t num_columns)
00585 {
00586     //for (int32_t i=0; i<num_columns; i++)
00587     //{
00588     //  for (int32_t j=0; j<num_elem; j++)
00589     //      SG_PRINT("%i ",mod_words_input[i*num_elem+j]);
00590     //  SG_PRINT("\n");
00591     //}
00592     m_svm_arrays_clean=false ;
00593 
00594     ASSERT(m_num_svms==num_elem);
00595     ASSERT(num_columns==2);
00596 
00597     m_mod_words.set_array(mod_words_input, num_elem, 2, true, true) ;
00598     m_mod_words_array = m_mod_words.get_array() ;
00599     
00600     /*SG_DEBUG( "m_mod_words=[") ;
00601     for (int32_t i=0; i<num_elem; i++)
00602         SG_DEBUG( "%i, ", p_mod_words_array[i]) ;
00603         SG_DEBUG( "]\n") ;*/
00604 } 
00605 
00606 bool CDynProg::check_svm_arrays()
00607 {
00608     //SG_DEBUG( "wd_dim1=%d, m_cum_num_words=%d, m_num_words=%d, m_svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n m_num_degrees=%d, m_num_svms=%d, m_num_strings=%d", m_word_degree.get_dim1(), m_cum_num_words.get_dim1(), m_num_words.get_dim1(), m_svm_pos_start.get_dim1(), m_num_unique_words.get_dim1(), m_mod_words.get_dim1(), m_mod_words.get_dim2(), m_sign_words.get_dim1(), m_string_words.get_dim1(), m_num_degrees, m_num_svms, m_num_strings);
00609     if ((m_word_degree.get_dim1()==m_num_degrees) &&
00610             (m_cum_num_words.get_dim1()==m_num_degrees+1) &&
00611             (m_num_words.get_dim1()==m_num_degrees) &&
00612             //(word_used.get_dim1()==m_num_degrees) &&
00613             //(word_used.get_dim2()==m_num_words[m_num_degrees-1]) &&
00614             //(word_used.get_dim3()==m_num_strings) &&
00615             //      (svm_values_unnormalized.get_dim1()==m_num_degrees) &&
00616             //      (svm_values_unnormalized.get_dim2()==m_num_svms) &&
00617             //(m_svm_pos_start.get_dim1()==m_num_degrees) &&
00618             (m_num_unique_words.get_dim1()==m_num_degrees) &&
00619             (m_mod_words.get_dim1()==m_num_svms) &&
00620             (m_mod_words.get_dim2()==2) && 
00621             (m_sign_words.get_dim1()==m_num_svms) &&
00622             (m_string_words.get_dim1()==m_num_svms))
00623     {
00624         m_svm_arrays_clean=true ;
00625         return true ;
00626     }
00627     else
00628     {
00629         if ((m_num_unique_words.get_dim1()==m_num_degrees) &&
00630             (m_mod_words.get_dim1()==m_num_svms) &&
00631             (m_mod_words.get_dim2()==2) &&
00632             (m_sign_words.get_dim1()==m_num_svms) &&
00633             (m_string_words.get_dim1()==m_num_svms))
00634             SG_PRINT("OK\n") ;
00635         else
00636             SG_PRINT("not OK\n") ;
00637 
00638         if (!(m_word_degree.get_dim1()==m_num_degrees))
00639             SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") ;
00640         if (!(m_cum_num_words.get_dim1()==m_num_degrees+1))
00641             SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") ;
00642         if (!(m_num_words.get_dim1()==m_num_degrees))
00643             SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") ;
00644         //if (!(m_svm_pos_start.get_dim1()==m_num_degrees))
00645         //  SG_WARNING("SVM array: m_svm_pos_start.get_dim1()!=m_num_degrees") ;
00646         if (!(m_num_unique_words.get_dim1()==m_num_degrees))
00647             SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") ;
00648         if (!(m_mod_words.get_dim1()==m_num_svms))
00649             SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") ;
00650         if (!(m_mod_words.get_dim2()==2))
00651             SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") ;
00652         if (!(m_sign_words.get_dim1()==m_num_svms))
00653             SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") ;
00654         if (!(m_string_words.get_dim1()==m_num_svms))
00655             SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") ;
00656 
00657         m_svm_arrays_clean=false ;
00658         return false ;  
00659     }
00660 }
00661 
00662 void CDynProg::set_observation_matrix(float64_t* seq, int32_t* dims, int32_t ndims)
00663 {
00664     if (ndims!=3)
00665         SG_ERROR("Expected 3-dimensional Matrix\n");
00666 
00667     int32_t N=dims[0];
00668     int32_t cand_pos=dims[1];
00669     int32_t max_num_features=dims[2];
00670 
00671     if (!m_svm_arrays_clean)
00672     {
00673         SG_ERROR( "SVM arrays not clean") ;
00674         return ;
00675     } ;
00676 
00677     ASSERT(N==m_N);
00678     ASSERT(cand_pos==m_seq_len);
00679     ASSERT(m_initial_state_distribution_p.get_dim1()==N);
00680     ASSERT(m_end_state_distribution_q.get_dim1()==N);
00681     
00682     m_observation_matrix.set_array(seq, N, m_seq_len, max_num_features, true, true) ;
00683 }
00684 int32_t CDynProg::get_num_positions()
00685 {
00686     return m_seq_len;
00687 }
00688 
00689 void CDynProg::set_content_type_array(float64_t* seg_path, int32_t rows, int32_t cols)
00690 {
00691     ASSERT(rows==2);
00692     ASSERT(cols==m_seq_len);
00693 
00694     if (seg_path!=NULL)
00695     {
00696         int32_t *segment_ids = new int32_t[m_seq_len] ;
00697         float64_t *segment_mask = new float64_t[m_seq_len] ;
00698         for (int32_t i=0; i<m_seq_len; i++)
00699         {
00700                 segment_ids[i] = (int32_t)seg_path[2*i] ;
00701                 segment_mask[i] = seg_path[2*i+1] ;
00702         }
00703         best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ;
00704         delete[] segment_ids;
00705         delete[] segment_mask;
00706     }
00707     else
00708     {
00709         int32_t *izeros = new int32_t[m_seq_len] ;
00710         float64_t *dzeros = new float64_t[m_seq_len] ;
00711         for (int32_t i=0; i<m_seq_len; i++)
00712         {
00713             izeros[i]=0 ;
00714             dzeros[i]=0.0 ;
00715         }
00716         best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ;
00717         delete[] izeros ;
00718         delete[] dzeros ;
00719     }
00720 }
00721 
00722 void CDynProg::set_pos(int32_t* pos, int32_t seq_len)  
00723 {
00724     //if (seq_len!=m_observation_matrix.get_dim2())
00725     //  SG_ERROR( "pos size does not match previous info %i!=%i\n", seq_len, m_observation_matrix.get_dim2()) ;
00726     
00727     m_pos.set_array(pos, seq_len, true, true) ;
00728     m_seq_len = seq_len;
00729 }
00730 
00731 void CDynProg::set_orf_info(int32_t* orf_info, int32_t m, int32_t n) 
00732 {
00733     if (n!=2)
00734         SG_ERROR( "orf_info size incorrect %i!=2\n", n) ;
00735 
00736     m_orf_info.set_array(orf_info, m, n, true, true) ;
00737     m_orf_info.set_name("orf_info") ;
00738 }
00739 
00740 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2)
00741 {
00742     if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
00743         SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n");
00744 
00745     SG_UNREF(m_seq_sparse1);
00746     SG_UNREF(m_seq_sparse2);
00747 
00748     m_seq_sparse1=seq_sparse1;
00749     m_seq_sparse2=seq_sparse2;
00750     SG_REF(m_seq_sparse1);
00751     SG_REF(m_seq_sparse2);
00752 }
00753 
00754 void CDynProg::set_plif_matrices(CPlifMatrix* pm)
00755 {
00756     SG_UNREF(m_plif_matrices);
00757 
00758     m_plif_matrices=pm;
00759 
00760     SG_REF(m_plif_matrices);
00761 }
00762 
00763 void CDynProg::set_gene_string(char* genestr, int32_t genestr_len)
00764 {
00765     ASSERT(genestr);
00766     ASSERT(genestr_len>0);
00767 
00768     m_genestr.set_array(genestr, genestr_len, true, true) ;
00769 }
00770 
00771 void CDynProg::set_my_state_seq(int32_t* my_state_seq)
00772 {
00773     ASSERT(my_state_seq && m_seq_len>0);
00774     m_my_state_seq.resize_array(m_seq_len);
00775     for (int32_t i=0; i<m_seq_len; i++)
00776         m_my_state_seq[i]=my_state_seq[i];
00777 }
00778 
00779 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq)
00780 {
00781     ASSERT(my_pos_seq && m_seq_len>0);
00782     m_my_pos_seq.resize_array(m_seq_len);
00783     for (int32_t i=0; i<m_seq_len; i++)
00784         m_my_pos_seq[i]=my_pos_seq[i];
00785 }
00786 
00787 void CDynProg::set_dict_weights(
00788     float64_t* dictionary_weights, int32_t dict_len, int32_t n)
00789 {
00790     if (m_num_svms!=n)
00791         SG_ERROR( "m_dict_weights array does not match num_svms=%i!=%i\n", m_num_svms, n) ;
00792 
00793     m_dict_weights.set_array(dictionary_weights, dict_len, m_num_svms, true, true) ;
00794 
00795     // initialize, so it does not bother when not used
00796     m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ;
00797     m_segment_loss.zero() ;
00798     m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ;
00799     m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ;
00800     m_segment_ids.zero() ;
00801     m_segment_mask.zero() ;
00802 }
00803 
00804 void CDynProg::best_path_set_segment_loss(
00805     float64_t* segment_loss, int32_t m, int32_t n)
00806 {
00807     // here we need two matrices. Store it in one: 2N x N
00808     if (2*m!=n)
00809         SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00810 
00811     if (m!=m_max_a_id+1)
00812         SG_ERROR( "segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) ;
00813 
00814     m_segment_loss.set_array(segment_loss, m, n/2, 2, true, true) ;
00815     /*for (int32_t i=0; i<n; i++)
00816         for (int32_t j=0; j<n; j++)
00817         SG_DEBUG( "loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) ;*/
00818 }
00819 
00820 void CDynProg::best_path_set_segment_ids_mask(
00821     int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00822 {
00823 
00824     if (m!=m_observation_matrix.get_dim2())
00825         SG_ERROR("size of segment_ids or segment_mask (%i)  does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2());
00826     int32_t max_id = 0;
00827     for (int32_t i=1;i<m;i++)
00828         max_id = CMath::max(max_id,segment_ids[i]);
00829     //SG_PRINT("max_id: %i, m:%i\n",max_id, m);     
00830     m_segment_ids.set_array(segment_ids, m, true, true) ;
00831     m_segment_ids.set_name("m_segment_ids");
00832     m_segment_mask.set_array(segment_mask, m, true, true) ;
00833     m_segment_mask.set_name("m_segment_mask");
00834     
00835     m_seg_loss_obj->set_segment_mask(&m_segment_mask);
00836     m_seg_loss_obj->set_segment_ids(&m_segment_ids);
00837     m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len);
00838 }
00839 
00840 void CDynProg::get_scores(float64_t **scores, int32_t *m) 
00841 {
00842    ASSERT(scores && m);
00843 
00844    //*scores=m_scores.get_array();
00845    *m=m_scores.get_dim1();
00846 
00847    int32_t sz = sizeof(float64_t)*(*m);
00848 
00849    *scores = (float64_t*) malloc(sz);
00850    ASSERT(*scores);
00851 
00852    memcpy(*scores,m_scores.get_array(),sz);
00853 }
00854 
00855 void CDynProg::get_states(int32_t **states, int32_t *m, int32_t *n) 
00856 {
00857    ASSERT(states && m && n);
00858 
00859     *m=m_states.get_dim1() ;
00860     *n=m_states.get_dim2() ;
00861 
00862    int32_t sz = sizeof(int32_t)*( (*m) * (*n) );
00863 
00864    *states = (int32_t*) malloc(sz);
00865    ASSERT(*states);
00866 
00867    memcpy(*states,m_states.get_array(),sz);
00868 }
00869 
00870 void CDynProg::get_positions(int32_t **positions, int32_t *m, int32_t *n)
00871 {
00872    ASSERT(positions && m && n);
00873 
00874     *m=m_positions.get_dim1() ;
00875     *n=m_positions.get_dim2() ;
00876 
00877    int32_t sz = sizeof(int32_t)*( (*m) * (*n) );
00878 
00879    *positions = (int32_t*) malloc(sz);
00880    ASSERT(*positions);
00881 
00882    memcpy(*positions,m_positions.get_array(),sz);
00883 }
00884 
00885 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len)
00886 {
00887    ASSERT(scores && seq_len);
00888 
00889    *seq_len=m_my_scores.get_dim1();
00890 
00891    int32_t sz = sizeof(float64_t)*(*seq_len);
00892 
00893    *scores = (float64_t*) malloc(sz);
00894    ASSERT(*scores);
00895 
00896    memcpy(*scores,m_my_scores.get_array(),sz);
00897 }
00898 
00899 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len)
00900 {
00901     ASSERT(losses && seq_len);
00902 
00903     *seq_len=m_my_losses.get_dim1();
00904 
00905    int32_t sz = sizeof(float64_t)*(*seq_len);
00906 
00907    *losses = (float64_t*) malloc(sz);
00908    ASSERT(*losses);
00909 
00910    memcpy(*losses,m_my_losses.get_array(),sz);
00911 }
00912 
00914 
00915 bool CDynProg::extend_orf(
00916     int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
00917     int32_t to)
00918 {
00919 #ifdef DYNPROG_TIMING_DETAIL
00920     MyTime.start() ;
00921 #endif
00922     
00923     if (start<0) 
00924         start=0 ;
00925     if (to<0)
00926         to=0 ;
00927     
00928     int32_t orf_target = orf_to-orf_from ;
00929     if (orf_target<0) orf_target+=3 ;
00930     
00931     int32_t pos ;
00932     if (last_pos==to)
00933         pos = to-orf_to-3 ;
00934     else
00935         pos=last_pos ;
00936 
00937     if (pos<0)
00938     {
00939 #ifdef DYNPROG_TIMING_DETAIL
00940         MyTime.stop() ;
00941         orf_time += MyTime.time_diff_sec() ;
00942 #endif
00943         return true ;
00944     }
00945     
00946     for (; pos>=start; pos-=3)
00947         if (m_genestr_stop[pos])
00948         {
00949 #ifdef DYNPROG_TIMING_DETAIL
00950             MyTime.stop() ;
00951             orf_time += MyTime.time_diff_sec() ;
00952 #endif
00953             return false ;
00954         }
00955     
00956     
00957     last_pos = CMath::min(pos+3,to-orf_to-3) ;
00958 
00959 #ifdef DYNPROG_TIMING_DETAIL
00960     MyTime.stop() ;
00961     orf_time += MyTime.time_diff_sec() ;
00962 #endif
00963     return true ;
00964 }
00965 
00966 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf,
00967         int16_t nbest, bool with_loss, bool with_multiple_sequences)
00968     {
00969 
00970     //FIXME we need checks here if all the fields are of right size
00971     //SG_PRINT("m_seq_len: %i\n", m_seq_len);
00972     //SG_PRINT("m_pos[0]: %i\n", m_pos[0]);
00973     //SG_PRINT("\n");
00974 
00975     //FIXME these variables can go away when compute_nbest_paths uses them
00976     //instead of the local pointers below
00977     const float64_t* seq_array = m_observation_matrix.get_array();
00978     m_scores.resize_array(nbest) ;
00979     m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00980     m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00981 
00982     for (int32_t i=0; i<nbest; i++)
00983     {
00984         m_scores[i]=-1;
00985         for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++)
00986         {
00987             m_states.element(i,j)=-1;
00988             m_positions.element(i,j)=-1;
00989         }
00990     }
00991     float64_t* prob_nbest=m_scores.get_array();
00992     int32_t* my_state_seq=m_states.get_array();
00993     int32_t* my_pos_seq=m_positions.get_array();
00994 
00995     CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
00996     CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
00997     //END FIXME
00998 
00999 
01000 #ifdef DYNPROG_TIMING
01001         segment_init_time = 0.0 ;
01002         segment_pos_time = 0.0 ;
01003         segment_extend_time = 0.0 ;
01004         segment_clean_time = 0.0 ;
01005         orf_time = 0.0 ;
01006         svm_init_time = 0.0 ;
01007         svm_pos_time = 0.0 ;
01008         svm_clean_time = 0.0 ;
01009         inner_loop_time = 0.0 ;
01010         content_svm_values_time = 0.0 ;
01011         content_plifs_time = 0.0 ;
01012         inner_loop_max_time = 0.0 ;
01013         long_transition_time = 0.0 ;
01014 
01015         MyTime2.start() ;
01016 #endif
01017 
01018         if (!m_svm_arrays_clean)
01019         {
01020             SG_ERROR( "SVM arrays not clean") ;
01021             return ;
01022         }
01023 
01024 #ifdef DYNPROG_DEBUG
01025         m_transition_matrix_a.set_name("transition_matrix");
01026         m_transition_matrix_a.display_array();
01027         m_mod_words.display_array() ;
01028         m_sign_words.display_array() ;
01029         m_string_words.display_array() ;
01030         //SG_PRINT("use_orf = %i\n", use_orf) ;
01031 #endif
01032 
01033         int32_t max_look_back = 1000 ;
01034         bool use_svm = false ;
01035 
01036         SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) ;
01037 
01038         //for (int32_t i=0;i<m_N*m_seq_len*max_num_signals;i++)
01039       //   SG_PRINT("(%i)%0.2f ",i,seq_array[i]);
01040 
01041         CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ;
01042         PEN.set_name("PEN");
01043         CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ;
01044         PEN_state_signals.set_name("state_signals");
01045 
01046         CArray2<float64_t> seq(m_N, m_seq_len) ;
01047         seq.set_name("seq") ;
01048         seq.zero() ;
01049 
01050 #ifdef DYNPROG_DEBUG
01051         SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data);
01052         SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs);
01053         SG_PRINT("m_num_svms: %i\n", m_num_svms);
01054         SG_PRINT("m_num_lin_feat_plifs_cum: ");
01055         for (int i=0; i<=m_num_raw_data; i++)
01056             SG_PRINT(" %i  ",m_num_lin_feat_plifs_cum[i]);
01057         SG_PRINT("\n");
01058 #endif
01059 
01060         float64_t* svm_value = new float64_t [m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
01061         { // initialize svm_svalue
01062             for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01063                 svm_value[s]=0 ;
01064         }
01065 
01066         { // convert seq_input to seq
01067             // this is independent of the svm values 
01068 
01069             //CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
01070             CArray3<float64_t> *seq_input=NULL ;
01071             if (seq_array!=NULL)
01072             {
01073                 //SG_PRINT("using dense seq_array\n") ;
01074 
01075                 seq_input=new CArray3<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ;
01076                 seq_input->set_name("seq_input") ;
01077                 //seq_input.display_array() ;
01078 
01079                 ASSERT(m_seq_sparse1==NULL) ;
01080                 ASSERT(m_seq_sparse2==NULL) ;
01081             } else
01082             {
01083                 SG_PRINT("using sparse seq_array\n") ;
01084 
01085                 ASSERT(m_seq_sparse1!=NULL) ;
01086                 ASSERT(m_seq_sparse2!=NULL) ;
01087                 ASSERT(max_num_signals==2) ;
01088             }
01089 
01090             for (int32_t i=0; i<m_N; i++)
01091                 for (int32_t j=0; j<m_seq_len; j++)
01092                     seq.element(i,j) = 0 ;
01093 
01094             for (int32_t i=0; i<m_N; i++)
01095                 for (int32_t j=0; j<m_seq_len; j++)
01096                     for (int32_t k=0; k<max_num_signals; k++)
01097                     {
01098                         if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
01099                         {
01100                             // no plif
01101                             if (seq_input!=NULL)
01102                                 seq.element(i,j) = seq_input->element(i,j,k) ;
01103                             else
01104                             {
01105                                 if (k==0)
01106                                     seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ;
01107                                 if (k==1)
01108                                     seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ;
01109                             }
01110                             break ;
01111                         }
01112                         if (PEN_state_signals.element(i,k)!=NULL)
01113                         {
01114                             if (seq_input!=NULL)
01115                             {
01116                                 // just one plif
01117                                 if (CMath::is_finite(seq_input->element(i,j,k)))
01118                                     seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(seq_input->element(i,j,k), svm_value) ;
01119                                 else
01120                                     // keep infinity values
01121                                     seq.element(i,j) = seq_input->element(i, j, k) ;
01122                             }
01123                             else
01124                             {
01125                                 if (k==0)
01126                                 {
01127                                     // just one plif
01128                                     if (CMath::is_finite(m_seq_sparse1->get_feature(i,j)))
01129                                         seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ;
01130                                     else
01131                                         // keep infinity values
01132                                         seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ;
01133                                 }
01134                                 if (k==1)
01135                                 {
01136                                     // just one plif
01137                                     if (CMath::is_finite(m_seq_sparse2->get_feature(i,j)))
01138                                         seq.element(i,j) += PEN_state_signals.element(i,k)->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ;
01139                                     else
01140                                         // keep infinity values
01141                                         seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ;
01142                                 }
01143                             }
01144                         } 
01145                         else
01146                             break ;
01147                     }
01148             delete seq_input;
01149             delete[] svm_value;
01150         }
01151 
01152         // allow longer transitions than look_back
01153         bool long_transitions = m_long_transitions ;
01154         CArray2<int32_t> long_transition_content_start_position(m_N,m_N) ;
01155         long_transition_content_start_position.set_name("long_transition_content_start_position");
01156 #ifdef DYNPROG_DEBUG
01157         CArray2<int32_t> long_transition_content_end_position(m_N,m_N) ;
01158         long_transition_content_end_position.set_name("long_transition_content_end_position");
01159 #endif
01160         CArray2<int32_t> long_transition_content_start(m_N,m_N) ;
01161         long_transition_content_start.set_name("long_transition_content_start");
01162         CArray2<float64_t> long_transition_content_scores(m_N,m_N) ;
01163         long_transition_content_scores.set_name("long_transition_content_scores");
01164 #ifdef DYNPROG_DEBUG
01165         CArray2<float64_t> long_transition_content_scores_pen(m_N,m_N) ;
01166         long_transition_content_scores_pen.set_name("long_transition_content_scores_pen");
01167         CArray2<float64_t> long_transition_content_scores_prev(m_N,m_N) ;
01168         long_transition_content_scores_prev.set_name("long_transition_content_scores_prev");
01169         CArray2<float64_t> long_transition_content_scores_elem(m_N,m_N) ;
01170         long_transition_content_scores_elem.set_name("long_transition_content_scores_elem");
01171 #endif      
01172         CArray2<float64_t> long_transition_content_scores_loss(m_N,m_N) ;
01173         long_transition_content_scores_loss.set_name("long_transition_content_scores_loss");
01174 
01175         if (nbest!=1)
01176         {
01177             SG_ERROR("Long transitions are not supported for nbest!=1") ;
01178             long_transitions = false ;
01179         }
01180         long_transition_content_scores.set_const(-CMath::INFTY);
01181 #ifdef DYNPROG_DEBUG
01182         long_transition_content_scores_pen.set_const(0) ;
01183         long_transition_content_scores_elem.set_const(0) ;
01184         long_transition_content_scores_prev.set_const(0) ;
01185 #endif
01186         if (with_loss)
01187             long_transition_content_scores_loss.set_const(0) ;
01188         long_transition_content_start.zero() ;
01189         long_transition_content_start_position.zero() ;
01190 #ifdef DYNPROG_DEBUG
01191         long_transition_content_end_position.zero() ;
01192 #endif
01193 
01194         svm_value = new float64_t [m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
01195         { // initialize svm_svalue
01196             for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01197                 svm_value[s]=0 ;
01198         }
01199 
01200         CArray2<int32_t> look_back(m_N,m_N) ;
01201         look_back.set_name("look_back");
01202         //CArray2<int32_t> look_back_orig(m_N,m_N) ;
01203         //look_back.set_name("look_back_orig");
01204 
01205 
01206         { // determine maximal length of look-back
01207             for (int32_t i=0; i<m_N; i++)
01208                 for (int32_t j=0; j<m_N; j++)
01209                 {
01210                     look_back.set_element(INT_MAX, i, j) ;
01211                     //look_back_orig.set_element(INT_MAX, i, j) ;
01212                 }
01213 
01214             for (int32_t j=0; j<m_N; j++)
01215             {
01216                 // only consider transitions that are actually allowed
01217                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01218                 const T_STATES *elem_list = trans_list_forward[j] ;
01219 
01220                 for (int32_t i=0; i<num_elem; i++)
01221                 {
01222                     T_STATES ii = elem_list[i] ;
01223 
01224                     CPlifBase *penij=PEN.element(j, ii) ;
01225                     if (penij==NULL)
01226                     {
01227                         if (long_transitions)
01228                         {
01229                             look_back.set_element(m_long_transition_threshold, j, ii) ;
01230                             //look_back_orig.set_element(m_long_transition_max, j, ii) ;
01231                         }
01232                         continue ;
01233                     }
01234 
01235                     /* if the transition is an ORF or we do computation with loss, we have to disable long transitions */
01236                     if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions))
01237                     {
01238                         look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01239                         //look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01240                         if (CMath::ceil(penij->get_max_value()) > max_look_back)
01241                         {
01242                             SG_DEBUG( "%d %d -> value: %f\n", ii,j,penij->get_max_value());
01243                             max_look_back = (int32_t) (CMath::ceil(penij->get_max_value()));
01244                         }
01245                     }
01246                     else
01247                     {
01248                         look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ;
01249                         //look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ;
01250                     }
01251                     
01252                     if (penij->uses_svm_values())
01253                         use_svm=true ;
01254                 }
01255             }
01256             /* make sure max_look_back is at least as long as a long transition */
01257             if (long_transitions)
01258                 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ;
01259 
01260             /* make sure max_look_back is not longer than the whole string */
01261             max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ;
01262 
01263             int32_t num_long_transitions = 0 ;
01264             for (int32_t i=0; i<m_N; i++)
01265                 for (int32_t j=0; j<m_N; j++)
01266                 {
01267                     if (look_back.get_element(i,j)==m_long_transition_threshold)
01268                         num_long_transitions++ ;
01269                     if (look_back.get_element(i,j)==INT_MAX)
01270                     {
01271                         if (long_transitions)
01272                         {
01273                             look_back.set_element(m_long_transition_threshold, i, j) ;
01274                             //look_back_orig.set_element(m_long_transition_max, i, j) ;
01275                         }
01276                         else
01277                         {
01278                             look_back.set_element(max_look_back, i, j) ;
01279                             //look_back_orig.set_element(m_long_transition_max, i, j) ;
01280                         }
01281                     }
01282                 }
01283             SG_DEBUG("Using %i long transitions\n", num_long_transitions) ;
01284         }
01285         //SG_PRINT("max_look_back: %i \n", max_look_back) ;
01286 
01287         //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr.get_dim1()) ;
01288         SG_DEBUG("use_svm=%i\n", use_svm) ;
01289 
01290         SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest);
01291         const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ;
01292         SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
01293         /*const float64_t mem_use = (float64_t)(m_seq_len*m_N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
01294           look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
01295           m_seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
01296           m_genestr.get_dim1()*sizeof(bool))/(1024*1024);*/
01297 
01298         //bool is_big = (mem_use>200) || (m_seq_len>5000) ;
01299 
01300         /*if (is_big)
01301           {
01302           SG_DEBUG("calling compute_nbest_paths: m_seq_len=%i, m_N=%i, lookback=%i nbest=%i\n", 
01303           m_seq_len, m_N, max_look_back, nbest) ;
01304           SG_DEBUG("allocating %1.2fMB of memory\n", 
01305           mem_use) ;
01306           }*/
01307         ASSERT(nbest<32000) ;
01308 
01309 
01310 
01311         CArray3<float64_t> delta(m_seq_len, m_N, nbest) ;
01312         delta.set_name("delta");
01313         float64_t* delta_array = delta.get_array() ;
01314         //delta.zero() ;
01315 
01316         CArray3<T_STATES> psi(m_seq_len, m_N, nbest) ;
01317         psi.set_name("psi");
01318         //psi.zero() ;
01319 
01320         CArray3<int16_t> ktable(m_seq_len, m_N, nbest) ;
01321         ktable.set_name("ktable");
01322         //ktable.zero() ;
01323 
01324         CArray3<int32_t> ptable(m_seq_len, m_N, nbest) ;    
01325         ptable.set_name("ptable");
01326         //ptable.zero() ;
01327 
01328         CArray<float64_t> delta_end(nbest) ;
01329         delta_end.set_name("delta_end");
01330         //delta_end.zero() ;
01331 
01332         CArray<T_STATES> path_ends(nbest) ;
01333         path_ends.set_name("path_ends");
01334         //path_ends.zero() ;
01335 
01336         CArray<int16_t> ktable_end(nbest) ;
01337         ktable_end.set_name("ktable_end");
01338         //ktable_end.zero() ;
01339 
01340         float64_t * fixedtempvv=new float64_t[look_back_buflen] ;
01341         memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
01342         int32_t * fixedtempii=new int32_t[look_back_buflen] ;
01343         memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
01344 
01345         CArray<float64_t> oldtempvv(look_back_buflen) ;
01346         oldtempvv.set_name("oldtempvv");
01347         CArray<float64_t> oldtempvv2(look_back_buflen) ;
01348         oldtempvv2.set_name("oldtempvv2");
01349         //oldtempvv.zero() ;
01350         //oldtempvv.display_size() ;
01351 
01352         CArray<int32_t> oldtempii(look_back_buflen) ;
01353         oldtempii.set_name("oldtempii");
01354         CArray<int32_t> oldtempii2(look_back_buflen) ;
01355         oldtempii2.set_name("oldtempii2");
01356         //oldtempii.zero() ;
01357 
01358         CArray<T_STATES> state_seq(m_seq_len) ;
01359         state_seq.set_name("state_seq");
01360         //state_seq.zero() ;
01361 
01362         CArray<int32_t> pos_seq(m_seq_len) ;
01363         pos_seq.set_name("pos_seq");
01364         //pos_seq.zero() ;
01365 
01366 
01367         m_dict_weights.set_name("dict_weights") ;
01368         m_word_degree.set_name("word_degree") ;
01369         m_cum_num_words.set_name("cum_num_words") ;
01370         m_num_words.set_name("num_words") ;
01371         //word_used.set_name("word_used") ;
01372         //svm_values_unnormalized.set_name("svm_values_unnormalized") ;
01373         //m_svm_pos_start.set_name("svm_pos_start") ;
01374         m_num_unique_words.set_name("num_unique_words") ;
01375 
01376         PEN.set_name("PEN") ;
01377         seq.set_name("seq") ;
01378 
01379         delta.set_name("delta") ;
01380         psi.set_name("psi") ;
01381         ktable.set_name("ktable") ;
01382         ptable.set_name("ptable") ;
01383         delta_end.set_name("delta_end") ;
01384         path_ends.set_name("path_ends") ;
01385         ktable_end.set_name("ktable_end") ;
01386 
01387 #ifdef USE_TMP_ARRAYCLASS
01388         fixedtempvv.set_name("fixedtempvv") ;
01389         fixedtempii.set_name("fixedtempvv") ;
01390 #endif
01391 
01392         oldtempvv.set_name("oldtempvv") ;
01393         oldtempvv2.set_name("oldtempvv2") ;
01394         oldtempii.set_name("oldtempii") ;
01395         oldtempii2.set_name("oldtempii2") ;
01396 
01397 
01399 
01400 #ifdef DYNPROG_DEBUG
01401         state_seq.display_size() ;
01402         pos_seq.display_size() ;
01403 
01404         m_dict_weights.display_size() ;
01405         m_word_degree.display_array() ;
01406         m_cum_num_words.display_array() ;
01407         m_num_words.display_array() ;
01408         //word_used.display_size() ;
01409         //svm_values_unnormalized.display_size() ;
01410         //m_svm_pos_start.display_array() ;
01411         m_num_unique_words.display_array() ;
01412 
01413         PEN.display_size() ;
01414         PEN_state_signals.display_size() ;
01415         seq.display_size() ;
01416         m_orf_info.display_size() ;
01417 
01418         //m_genestr_stop.display_size() ;
01419         delta.display_size() ;
01420         psi.display_size() ;
01421         ktable.display_size() ;
01422         ptable.display_size() ;
01423         delta_end.display_size() ;
01424         path_ends.display_size() ;
01425         ktable_end.display_size() ;
01426 
01427 #ifdef USE_TMP_ARRAYCLASS
01428         fixedtempvv.display_size() ;
01429         fixedtempii.display_size() ;
01430 #endif
01431 
01432         //oldtempvv.display_size() ;
01433         //oldtempii.display_size() ;
01434 
01435         state_seq.display_size() ;
01436         pos_seq.display_size() ;
01437 
01438         //seq.zero() ;
01439 
01440 #endif //DYNPROG_DEBUG
01441 
01443 
01444 
01445 
01446         {
01447             for (int32_t s=0; s<m_num_svms; s++)
01448                 ASSERT(m_string_words_array[s]<1)  ;
01449         }
01450 
01451 
01452         //CArray2<int32_t*> trans_matrix_svms(m_N,m_N);
01453         //CArray2<int32_t> trans_matrix_num_svms(m_N,m_N);
01454 
01455         { // initialization
01456 
01457             for (T_STATES i=0; i<m_N; i++)
01458             {
01459                 //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
01460                 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
01461                 psi.element(0,i,0)   = 0 ;
01462                 if (nbest>1)
01463                     ktable.element(0,i,0)  = 0 ;
01464                 ptable.element(0,i,0)  = 0 ;
01465                 for (int16_t k=1; k<nbest; k++)
01466                 {
01467                     int32_t dim1, dim2, dim3 ;
01468                     delta.get_array_size(dim1, dim2, dim3) ;
01469                     //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) ;
01470                     //delta.element(0, i, k)    = -CMath::INFTY ;
01471                     delta.element(delta_array, 0, i, k, m_seq_len, m_N)    = -CMath::INFTY ;
01472                     psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
01473                     if (nbest>1)
01474                         ktable.element(0,i,k)     = 0 ;
01475                     ptable.element(0,i,k)     = 0 ;
01476                 }
01477                 /*
01478                    for (T_STATES j=0; j<m_N; j++)
01479                    {
01480                    CPlifBase * penalty = PEN.element(i,j) ;
01481                    int32_t num_current_svms=0;
01482                    int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
01483                    if (penalty)
01484                    {
01485                    SG_PRINT("trans %i -> %i \n",i,j);
01486                    penalty->get_used_svms(&num_current_svms, svm_ids);
01487                    trans_matrix_svms.set_element(svm_ids,i,j);
01488                    for (int32_t l=0;l<num_current_svms;l++)
01489                    SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]);
01490                    trans_matrix_num_svms.set_element(num_current_svms,i,j);
01491                    }
01492                    }
01493                    */
01494 
01495             }
01496         }
01497 
01498         SG_DEBUG("START_RECURSION \n\n");
01499 
01500         // recursion
01501         for (int32_t t=1; t<m_seq_len; t++)
01502         {
01503             //if (is_big && t%(1+(m_seq_len/1000))==1)
01504             //  SG_PROGRESS(t, 0, m_seq_len);
01505             //SG_PRINT("%i\n", t) ;
01506 
01507             for (T_STATES j=0; j<m_N; j++)
01508             {
01509                 if (seq.element(j,t)<=-1e20)
01510                 { // if we cannot observe the symbol here, then we can omit the rest
01511                     for (int16_t k=0; k<nbest; k++)
01512                     {
01513                         delta.element(delta_array, t, j, k, m_seq_len, m_N)    = seq.element(j,t) ;
01514                         psi.element(t,j,k)         = 0 ;
01515                         if (nbest>1)
01516                             ktable.element(t,j,k)  = 0 ;
01517                         ptable.element(t,j,k)      = 0 ;
01518                     }
01519                 }
01520                 else
01521                 {
01522                     const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01523                     const T_STATES *elem_list = trans_list_forward[j] ;
01524                     const float64_t *elem_val      = trans_list_forward_val[j] ;
01525                     const int32_t *elem_id      = trans_list_forward_id[j] ;
01526 
01527                     int32_t fixed_list_len = 0 ;
01528                     float64_t fixedtempvv_ = CMath::INFTY ;
01529                     int32_t fixedtempii_ = 0 ;
01530                     bool fixedtemplong = false ;
01531 
01532                     for (int32_t i=0; i<num_elem; i++)
01533                     {
01534                         T_STATES ii = elem_list[i] ;
01535 
01536                         const CPlifBase * penalty = PEN.element(j,ii) ;
01537 
01538                         /*int32_t look_back = max_look_back ;
01539                           if (0)
01540                           { // find lookback length
01541                           CPlifBase *pen = (CPlifBase*) penalty ;
01542                           if (pen!=NULL)
01543                           look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
01544                           if (look_back>=1e6)
01545                           SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
01546                           ASSERT(look_back<1e6);
01547                           } */
01548 
01549                         int32_t look_back_ = look_back.element(j, ii) ;
01550 
01551                         int32_t orf_from = m_orf_info.element(ii,0) ;
01552                         int32_t orf_to   = m_orf_info.element(j,1) ;
01553                         if((orf_from!=-1)!=(orf_to!=-1))
01554                             SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01555                         ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01556 
01557                         int32_t orf_target = -1 ;
01558                         if (orf_from!=-1)
01559                         {
01560                             orf_target=orf_to-orf_from ;
01561                             if (orf_target<0) 
01562                                 orf_target+=3 ;
01563                             ASSERT(orf_target>=0 && orf_target<3) ;
01564                         }
01565 
01566                         int32_t orf_last_pos = m_pos[t] ;
01567 #ifdef DYNPROG_TIMING
01568                         MyTime3.start() ;
01569 #endif              
01570                         int32_t num_ok_pos = 0 ;
01571                         float64_t last_mval=0 ;
01572                         int32_t last_ts = 0 ;
01573 
01574                         for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--)
01575                         {
01576                             bool ok ;
01577                             //int32_t plen=t-ts;
01578 
01579                             /*for (int32_t s=0; s<m_num_svms; s++)
01580                               if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) ||
01581                               (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6))
01582                               {
01583                               SG_DEBUG( "s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]);
01584                               }*/
01585 
01586                             if (orf_target==-1)
01587                                 ok=true ;
01588                             else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
01589                                 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
01590                             else
01591                                 ok=false ;
01592 
01593                             if (ok)
01594                             {
01595 
01596                                 float64_t segment_loss = 0.0 ;
01597                                 if (with_loss)
01598                                 {
01599                                     segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]);
01600                                     //if (segment_loss!=segment_loss2)
01601                                         //SG_PRINT("segment_loss:%f segment_loss2:%f\n", segment_loss, segment_loss2);
01602                                 }
01604                                 // BEST_PATH_TRANS
01606 
01607                                 int32_t frame = orf_from;//m_orf_info.element(ii,0);
01608                                 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01609 
01610                                 float64_t pen_val = 0.0 ;
01611                                 if (penalty)
01612                                 {
01613 #ifdef DYNPROG_TIMING_DETAIL
01614                                     MyTime.start() ;
01615 #endif                              
01616                                     pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01617 
01618 #ifdef DYNPROG_TIMING_DETAIL
01619                                     MyTime.stop() ;
01620                                     content_plifs_time += MyTime.time_diff_sec() ;
01621 #endif
01622                                 }
01623 
01624 #ifdef DYNPROG_TIMING_DETAIL
01625                                 MyTime.start() ;
01626 #endif                              
01627                                 num_ok_pos++ ;
01628 
01629                                 if (nbest==1)
01630                                 {
01631                                     float64_t  val        = elem_val[i] + pen_val ;
01632                                     if (with_loss)
01633                                         val              += segment_loss ;
01634 
01635                                     float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
01636 
01637                                     if (mval<fixedtempvv_)
01638                                     {
01639                                         fixedtempvv_ = mval ;
01640                                         fixedtempii_ = ii + ts*m_N;
01641                                         fixed_list_len = 1 ;
01642                                         fixedtemplong = false ;
01643                                     }
01644                                     last_mval = mval ;
01645                                     last_ts = ts ;
01646                                 }
01647                                 else
01648                                 {
01649                                     for (int16_t diff=0; diff<nbest; diff++)
01650                                     {
01651                                         float64_t  val        = elem_val[i]  ;
01652                                         val                  += pen_val ;
01653                                         if (with_loss)
01654                                             val              += segment_loss ;
01655 
01656                                         float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
01657 
01658                                         /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */
01659                                         /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/
01660                                         /* fixed_list_len has the number of elements in fixedtempvv */
01661 
01662                                         if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
01663                                         {
01664                                             if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
01665                                             {
01666                                                 fixedtempvv[fixed_list_len] = mval ;
01667                                                 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
01668                                                 fixed_list_len++ ;
01669                                             }
01670                                             else  // must have mval < fixedtempvv[fixed_list_len-1]
01671                                             {
01672                                                 int32_t addhere = fixed_list_len;
01673                                                 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
01674                                                     addhere--;
01675 
01676                                                 // move everything from addhere+1 one forward 
01677                                                 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
01678                                                 {
01679                                                     fixedtempvv[jj] = fixedtempvv[jj-1];
01680                                                     fixedtempii[jj] = fixedtempii[jj-1];
01681                                                 }
01682 
01683                                                 fixedtempvv[addhere] = mval;
01684                                                 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
01685 
01686                                                 if (fixed_list_len < nbest)
01687                                                     fixed_list_len++;
01688                                             }
01689                                         }
01690                                     }
01691                                 }
01692 #ifdef DYNPROG_TIMING_DETAIL
01693                                 MyTime.stop() ;
01694                                 inner_loop_max_time += MyTime.time_diff_sec() ;
01695 #endif
01696                             }
01697                         }
01698 #ifdef DYNPROG_TIMING
01699                         MyTime3.stop() ;
01700                         inner_loop_time += MyTime3.time_diff_sec() ;
01701 #endif
01702                     }
01703                     for (int32_t i=0; i<num_elem; i++)
01704                     {
01705                         T_STATES ii = elem_list[i] ;
01706 
01707                         const CPlifBase * penalty = PEN.element(j,ii) ;
01708 
01709                         /*int32_t look_back = max_look_back ;
01710                           if (0)
01711                           { // find lookback length
01712                           CPlifBase *pen = (CPlifBase*) penalty ;
01713                           if (pen!=NULL)
01714                           look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
01715                           if (look_back>=1e6)
01716                           SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
01717                           ASSERT(look_back<1e6);
01718                           } */
01719 
01720                         int32_t look_back_ = look_back.element(j, ii) ;
01721                         //int32_t look_back_orig_ = look_back_orig.element(j, ii) ;
01722 
01723                         int32_t orf_from = m_orf_info.element(ii,0) ;
01724                         int32_t orf_to   = m_orf_info.element(j,1) ;
01725                         if((orf_from!=-1)!=(orf_to!=-1))
01726                             SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01727                         ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01728 
01729                         int32_t orf_target = -1 ;
01730                         if (orf_from!=-1)
01731                         {
01732                             orf_target=orf_to-orf_from ;
01733                             if (orf_target<0) 
01734                                 orf_target+=3 ;
01735                             ASSERT(orf_target>=0 && orf_target<3) ;
01736                         }
01737 
01738                         //int32_t loss_last_pos = t ;
01739                         //float64_t last_loss = 0.0 ;
01740 
01741 #ifdef DYNPROG_TIMING
01742                         MyTime3.start() ;
01743 #endif              
01744 
01745                         /* long transition stuff */
01746                         /* only do this, if 
01747                          * this feature is enabled
01748                          * this is not a transition with ORF restrictions
01749                          * the loss is switched off
01750                          * nbest=1
01751                          */ 
01752 #ifdef DYNPROG_TIMING
01753                         MyTime3.start() ;
01754 #endif
01755                         // long transitions, only when not considering ORFs
01756                         if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold )
01757                         {
01758 
01759                             // update table for 5' part  of the long segment
01760 
01761                             int32_t start = long_transition_content_start.get_element(ii, j) ;
01762                             int32_t end_5p_part = start ;
01763                             for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++)
01764                             {
01765                                 // find end_5p_part, which is greater than start_5p_part and at least m_long_transition_threshold away
01766                                 while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold)
01767                                     end_5p_part++ ;
01768 
01769                                 ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t) ;
01770                                 ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold) ;
01771 
01772                                 float64_t pen_val = 0.0;
01773                                 /* recompute penalty, if necessary */
01774                                 if (penalty)
01775                                 {
01776                                     int32_t frame = m_orf_info.element(ii,0);
01777                                     lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame); // * t -> end_5p_part 
01778                                     pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ;
01779                                 }
01780 
01781                                 /*if (m_pos[start_5p_part]==1003)
01782                                   {
01783                                   SG_PRINT("Part1: %i - %i   vs  %i - %i\n", m_pos[t], m_pos[ts], m_pos[end_5p_part], m_pos[start_5p_part]) ;
01784                                   SG_PRINT("Part1: ts=%i  t=%i  start_5p_part=%i  m_seq_len=%i\n", m_pos[ts], m_pos[t], m_pos[start_5p_part], m_seq_len) ;
01785                                   }*/
01786 
01787                                 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ;
01788                                 //float64_t mval_trans = -( elem_val[i] + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; // enable this for the incomplete extra check
01789 
01790                                 float64_t segment_loss_part1=0.0 ;
01791                                 if (with_loss)
01792                                 {  // this is the loss from the start of the long segment (5' part + middle section)
01793 
01794                                     segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part /*long_transition_content_start_position.get_element(ii,j)*/, end_5p_part, elem_id[i]); // * unsure
01795 
01796                                     mval_trans -= segment_loss_part1 ;
01797                                 }
01798 
01799                                 
01800                                 if (0)//m_pos[end_5p_part] - m_pos[long_transition_content_start_position.get_element(ii, j)] > look_back_orig_/*m_long_transition_max*/)
01801                                 {
01802                                     // this restricts the maximal length of segments, 
01803                                     // but the current implementation is not valid since the 
01804                                     // long transition is discarded without loocking if there 
01805                                     // is a second best long transition in between
01806                                     long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ;
01807                                     long_transition_content_start_position.set_element(0, ii, j) ;
01808                                     if (with_loss)
01809                                         long_transition_content_scores_loss.set_element(0.0, ii, j) ;
01810 #ifdef DYNPROG_DEBUG
01811                                     long_transition_content_scores_pen.set_element(0.0, ii, j) ;
01812                                     long_transition_content_scores_elem.set_element(0.0, ii, j) ;
01813                                     long_transition_content_scores_prev.set_element(0.0, ii, j) ;
01814                                     long_transition_content_end_position.set_element(0, ii, j) ;
01815 #endif
01816                                 }
01817                                 if (with_loss)
01818                                 {
01819                                     float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ;
01820                                     float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]);
01821                                     float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ;
01822                                     long_transition_content_scores.set_element(score, ii, j) ;
01823                                     long_transition_content_scores_loss.set_element(new_loss, ii, j) ;
01824 #ifdef DYNPROG_DEBUG
01825                                     long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01826 #endif
01827 
01828                                 }
01829                                 if (-long_transition_content_scores.get_element(ii, j) > mval_trans )
01830                                 {
01831                                     /* then the old long transition is either too far away or worse than the current one */
01832                                     long_transition_content_scores.set_element(-mval_trans, ii, j) ;
01833                                     long_transition_content_start_position.set_element(start_5p_part, ii, j) ;
01834                                     if (with_loss)
01835                                         long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ;
01836 #ifdef DYNPROG_DEBUG
01837                                     long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ;
01838                                     long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ;
01839                                     long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ;
01840                                     /*ASSERT(fabs(long_transition_content_scores.get_element(ii, j)-(long_transition_content_scores_pen.get_element(ii, j) +
01841                                       long_transition_content_scores_elem.get_element(ii, j) + 
01842                                       long_transition_content_scores_prev.get_element(ii, j)))<1e-6) ;*/
01843                                     long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01844 #endif
01845                                 }
01846                                 //
01847                                 // this sets the position where the search for better 5'parts is started the next time
01848                                 // whithout this the prediction takes ages
01849                                 //
01850                                 long_transition_content_start.set_element(start_5p_part, ii, j) ; 
01851                             }
01852 
01853                             // consider the 3' part at the end of the long segment:
01854                             // * with length = m_long_transition_threshold
01855                             // * content prediction and loss only for this part
01856 
01857                             // find ts > 0 with distance from m_pos[t] greater m_long_transition_threshold 
01858                             // precompute: only depends on t
01859                             int ts = t;
01860                             while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
01861                                 ts-- ;
01862 
01863                             if (ts>0)
01864                             {
01865                                 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ;
01866 
01867 
01868                                 /* only consider this transition, if the right position was found */
01869                                 float pen_val_3p = 0.0 ;
01870                                 if (penalty)
01871                                 {
01872                                     int32_t frame = orf_from ; //m_orf_info.element(ii, 0);
01873                                     lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame); 
01874                                     pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01875                                 }
01876 
01877                                 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ;
01878                                 
01879                                 {
01880 #ifdef DYNPROG_DEBUG
01881                                     float64_t segment_loss_part2=0.0 ;
01882                                     float64_t segment_loss_part1=0.0 ;
01883 #endif
01884                                     float64_t segment_loss_total=0.0 ;
01885                                     
01886                                     if (with_loss)
01887                                     {   // this is the loss for the 3' end fragment of the segment
01888                                         // (the 5' end and the middle section loss is already contained in mval)
01889                                         
01890 #ifdef DYNPROG_DEBUG
01891                                         // this is an alternative, which should be identical, if the loss is additive
01892                                         segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
01893                                         //mval -= segment_loss_part2 ;
01894                                         segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
01895 #endif
01896                                         segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
01897                                         mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
01898                                     }
01899                                     
01900 #ifdef DYNPROG_DEBUG
01901                                     if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561)
01902                                     {
01903                                         SG_PRINT("Part2: %i,%i,%i: val=%1.6f  pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i,  loss=%1.1f/%1.1f (%i,%i)\n", 
01904                                                  m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1], 
01905                                                  long_transition_content_scores.get_element(ii, j), 
01906                                                  long_transition_content_scores_pen.get_element(ii, j), 
01907                                                  long_transition_content_scores_prev.get_element(ii, j), 
01908                                                  long_transition_content_scores_elem.get_element(ii, j), 
01909                                                  long_transition_content_scores_loss.get_element(ii, j), 
01910                                                  m_pos[long_transition_content_start_position.get_element(ii,j)], 
01911                                                  m_pos[long_transition_content_end_position.get_element(ii,j)], 
01912                                                  m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
01913                                         SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] );
01914                                     }
01915                                     
01916                                     if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
01917                                     {
01918                                         SG_ERROR("LOSS: total=%1.1f (%i-%i)  part1=%1.1f/%1.1f (%i-%i)  part2=%1.1f (%i-%i)  sum=%1.1f  diff=%1.1f\n", 
01919                                                  segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t], 
01920                                                  long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
01921                                                  segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t], 
01922                                                  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j), 
01923                                                  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
01924                                     }
01925 #endif
01926                                 }
01927 
01928                                 // prefer simpler version to guarantee optimality
01929                                 // 
01930                                 // original:
01931                                 /* if ((mval < fixedtempvv_) &&
01932                                     (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_) */
01933                                 if (mval < fixedtempvv_)
01934                                 {
01935                                     /* then the long transition is better than the short one => replace it */ 
01936                                     int32_t fromtjk =  fixedtempii_ ;
01937                                     /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n", 
01938                                       m_pos[t], j, 
01939                                       mval, pen_val_3p*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii, 
01940                                       m_pos[long_transition_content_position.get_element(ii, j)], 
01941                                       fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/
01942                                     ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ;
01943 
01944                                     fixedtempvv_ = mval ;
01945                                     fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ;
01946                                     fixed_list_len = 1 ;
01947                                     fixedtemplong = true ;
01948                                 }
01949 
01950                                 /* // extra check
01951                                    float64_t mval_trans2 = -( elem_val[i] + pen_val_3p + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ;
01952                                    if (last_ts==ts && fabs(last_mval-mval_trans2)>1e-5)
01953                                    SG_PRINT("last_mval=%1.2f at m_pos %i vs. mval_trans2=%1.2f at m_pos %i (diff=%f)\n", last_mval, m_pos[last_ts], mval_trans2, m_pos[ts], last_mval-mval_trans2) ;
01954                                    */
01955                             }
01956                         }
01957                     }
01958 #ifdef DYNPROG_TIMING
01959                     MyTime3.stop() ;
01960                     long_transition_time += MyTime3.time_diff_sec() ;
01961 #endif
01962 
01963 
01964                     int32_t numEnt = fixed_list_len;
01965 
01966                     float64_t minusscore;
01967                     int64_t fromtjk;
01968 
01969                     for (int16_t k=0; k<nbest; k++)
01970                     {
01971                         if (k<numEnt)
01972                         {
01973                             if (nbest==1)
01974                             {
01975                                 minusscore = fixedtempvv_ ;
01976                                 fromtjk = fixedtempii_ ;
01977                             }
01978                             else
01979                             {
01980                                 minusscore = fixedtempvv[k];
01981                                 fromtjk = fixedtempii[k];
01982                             }
01983 
01984                             delta.element(delta_array, t, j, k, m_seq_len, m_N)    = -minusscore + seq.element(j,t);
01985                             psi.element(t,j,k)      = (fromtjk%m_N) ;
01986                             if (nbest>1)
01987                                 ktable.element(t,j,k)   = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ;
01988                             ptable.element(t,j,k)   = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
01989                         }
01990                         else
01991                         {
01992                             delta.element(delta_array, t, j, k, m_seq_len, m_N)    = -CMath::INFTY ;
01993                             psi.element(t,j,k)      = 0 ;
01994                             if (nbest>1)
01995                                 ktable.element(t,j,k)     = 0 ;
01996                             ptable.element(t,j,k)     = 0 ;
01997                         }
01998                     }
01999                 }
02000             }
02001         }
02002         { //termination
02003             int32_t list_len = 0 ;
02004             for (int16_t diff=0; diff<nbest; diff++)
02005             {
02006                 for (T_STATES i=0; i<m_N; i++)
02007                 {
02008                     oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ;
02009                     oldtempii[list_len] = i + diff*m_N ;
02010                     list_len++ ;
02011                 }
02012             }
02013 
02014             CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
02015 
02016             for (int16_t k=0; k<nbest; k++)
02017             {
02018                 delta_end.element(k) = -oldtempvv[k] ;
02019                 path_ends.element(k) = (oldtempii[k]%m_N) ;
02020                 if (nbest>1)
02021                     ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ;
02022             }
02023 
02024 
02025         }
02026 
02027         { //state sequence backtracking     
02028             for (int16_t k=0; k<nbest; k++)
02029             {
02030                 prob_nbest[k]= delta_end.element(k) ;
02031 
02032                 int32_t i         = 0 ;
02033                 state_seq[i]  = path_ends.element(k) ;
02034                 int16_t q   = 0 ;
02035                 if (nbest>1)
02036                     q=ktable_end.element(k) ;
02037                 pos_seq[i]    = m_seq_len-1 ;
02038 
02039                 while (pos_seq[i]>0)
02040                 {
02041                     ASSERT(i+1<m_seq_len);
02042                     //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02043                     state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02044                     pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
02045                     if (nbest>1)
02046                         q              = ktable.element(pos_seq[i], state_seq[i], q) ;
02047                     i++ ;
02048                 }
02049                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02050                 int32_t num_states = i+1 ;
02051                 for (i=0; i<num_states;i++)
02052                 {
02053                     my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ;
02054                     my_pos_seq[i+k*m_seq_len]   = pos_seq[num_states-i-1] ;
02055                 }
02056                 if (num_states<m_seq_len)
02057                 {
02058                     my_state_seq[num_states+k*m_seq_len]=-1 ;
02059                     my_pos_seq[num_states+k*m_seq_len]=-1 ;
02060                 }
02061             }
02062         }
02063 
02064         //if (is_big)
02065         //  SG_PRINT( "DONE.     \n") ;
02066 
02067 
02068 #ifdef DYNPROG_TIMING
02069         MyTime2.stop() ;
02070 
02071         //if (is_big)
02072         SG_PRINT("Timing:  orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s  Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s  svm_pos=%1.2f  svm_clean=%1.2f\n  content_svm_values_time=%1.2f  content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) ;
02073 #endif
02074 
02075         delete[] fixedtempvv ;
02076         delete[] fixedtempii ;
02077     }
02078 
02079 
02080 void CDynProg::best_path_trans_deriv(
02081     int32_t *my_state_seq, int32_t *my_pos_seq,
02082     int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
02083 {   
02084     m_initial_state_distribution_p_deriv.resize_array(m_N) ;
02085     m_end_state_distribution_q_deriv.resize_array(m_N) ;
02086     m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
02087     //m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
02088     //m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
02089     m_my_scores.resize_array(my_seq_len);
02090     m_my_losses.resize_array(my_seq_len);
02091     float64_t* my_scores=m_my_scores.get_array();
02092     float64_t* my_losses=m_my_losses.get_array();
02093     CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
02094     CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
02095 
02096     if (!m_svm_arrays_clean)
02097     {
02098         SG_ERROR( "SVM arrays not clean") ;
02099         return ;
02100     } ;
02101     //SG_PRINT( "genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) ;
02102     //m_mod_words.display() ;
02103     //m_sign_words.display() ;
02104     //m_string_words.display() ;
02105 
02106     bool use_svm = false ;
02107 
02108     CArray2<CPlifBase*> PEN(Plif_matrix, m_N, m_N, false, false) ;
02109    PEN.set_name("PEN");
02110     CArray2<CPlifBase*> PEN_state_signals(Plif_state_signals, m_N, max_num_signals, false, false) ;
02111    PEN_state_signals.set_name("PEN_state_signals");
02112     CArray3<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
02113    seq_input.set_name("seq_input");
02114 
02115     { // determine whether to use svm outputs and clear derivatives
02116         for (int32_t i=0; i<m_N; i++)
02117             for (int32_t j=0; j<m_N; j++)
02118             {
02119                 CPlifBase *penij=PEN.element(i,j) ;
02120                 if (penij==NULL)
02121                     continue ;
02122 
02123                 if (penij->uses_svm_values())
02124                     use_svm=true ;
02125                 penij->penalty_clear_derivative() ;
02126             }
02127         for (int32_t i=0; i<m_N; i++)
02128             for (int32_t j=0; j<max_num_signals; j++)
02129             {
02130                 CPlifBase *penij=PEN_state_signals.element(i,j) ;
02131                 if (penij==NULL)
02132                     continue ;
02133                 if (penij->uses_svm_values())
02134                     use_svm=true ;
02135                 penij->penalty_clear_derivative() ;
02136             }
02137     }
02138 
02139     { // set derivatives of p, q and a to zero
02140 
02141         for (int32_t i=0; i<m_N; i++)
02142         {
02143             m_initial_state_distribution_p_deriv.element(i)=0 ;
02144             m_end_state_distribution_q_deriv.element(i)=0 ;
02145             for (int32_t j=0; j<m_N; j++)
02146                 m_transition_matrix_a_deriv.element(i,j)=0 ;
02147         }
02148     }
02149 
02150     { // clear score vector
02151         for (int32_t i=0; i<my_seq_len; i++)
02152         {
02153             my_scores[i]=0.0 ;
02154             my_losses[i]=0.0 ;
02155         }
02156     }
02157 
02158     //int32_t total_len = 0 ;
02159 
02160     //m_transition_matrix_a.display_array() ;
02161     //m_transition_matrix_a_id.display_array() ;
02162 
02163     // compute derivatives for given path
02164     float64_t* svm_value = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
02165     float64_t* svm_value_part1 = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
02166     float64_t* svm_value_part2 = new float64_t[m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs];
02167     for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02168     {
02169         svm_value[s]=0 ;
02170         svm_value_part1[s]=0 ;
02171         svm_value_part2[s]=0 ;
02172     }
02173 
02174     //#ifdef DYNPROG_DEBUG
02175     float64_t total_score = 0.0 ;
02176     float64_t total_loss = 0.0 ;
02177     //#endif        
02178 
02179     ASSERT(my_state_seq[0]>=0) ;
02180     m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
02181     my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ;
02182 
02183     ASSERT(my_state_seq[my_seq_len-1]>=0) ;
02184     m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
02185     my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
02186 
02187     //#ifdef DYNPROG_DEBUG
02188     total_score += my_scores[0] + my_scores[my_seq_len-1] ;
02189     //#endif        
02190 
02191     SG_DEBUG( "m_seq_len=%i\n", my_seq_len) ;
02192     for (int32_t i=0; i<my_seq_len-1; i++)
02193     {
02194         if (my_state_seq[i+1]==-1)
02195             break ;
02196         int32_t from_state = my_state_seq[i] ;
02197         int32_t to_state   = my_state_seq[i+1] ;
02198         int32_t from_pos   = my_pos_seq[i] ;
02199         int32_t to_pos     = my_pos_seq[i+1] ;
02200 
02201         int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ;
02202         my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id);
02203 
02204 #ifdef DYNPROG_DEBUG
02205 
02206 
02207         if (i>0)// test if segment loss is additive
02208         {
02209             float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id);
02210             float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id);
02211             float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id);
02212             SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3);
02213             if (CMath::abs(loss1+loss2-loss3)>0)
02214             {
02215                 SG_PRINT( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02216             }
02217         }
02218         io->set_loglevel(M_DEBUG) ;
02219         SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02220 #endif
02221         // increase usage of this transition
02222         m_transition_matrix_a_deriv.element(from_state, to_state)++ ;
02223         my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ;
02224         //SG_PRINT("m_transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, m_transition_matrix_a.element(from_state, to_state));
02225 #ifdef DYNPROG_DEBUG
02226         SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02227 #endif
02228 
02229         /*int32_t last_svm_pos[m_num_degrees] ;
02230           for (int32_t qq=0; qq<m_num_degrees; qq++)
02231           last_svm_pos[qq]=-1 ;*/
02232 
02233         bool is_long_transition = false ;
02234         if (m_long_transitions)
02235         {
02236             if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold)
02237                 is_long_transition = true ;
02238             if (m_orf_info.element(from_state,0)!=-1)
02239                 is_long_transition = false ;
02240         }
02241         
02242         int32_t from_pos_thresh = from_pos ;
02243         int32_t to_pos_thresh = to_pos ;
02244 
02245         if (use_svm)
02246         {
02247             if (is_long_transition)
02248             {
02249                 
02250                 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold) // *
02251                     from_pos_thresh++ ;
02252                 ASSERT(from_pos_thresh<to_pos) ;
02253                 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold); // *
02254                 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold);// *
02255 
02256                 int32_t frame = m_orf_info.element(from_state,0);
02257                 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame);
02258 
02259 #ifdef DYNPROG_DEBUG
02260                 SG_PRINT("part1: pos1: %i  pos2: %i   pos3: %i  \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) ;
02261                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02262                     SG_PRINT("%1.4f  ", svm_value_part1[s]);
02263                 SG_PRINT("\n");
02264 #endif
02265 
02266                 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold) // *
02267                     to_pos_thresh-- ;
02268                 ASSERT(to_pos_thresh>0) ;
02269                 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) ; // *
02270                 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) ; // *
02271 
02272                 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame);
02273 
02274 #ifdef DYNPROG_DEBUG
02275                 SG_PRINT("part2: pos1: %i  pos2: %i   pos3: %i  \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) ;
02276                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02277                     SG_PRINT("%1.4f  ", svm_value_part2[s]);
02278                 SG_PRINT("\n");
02279 #endif
02280             }
02281             else
02282             {
02283                 /* normal case */
02284 
02285                 //SG_PRINT("from_pos: %i; to_pos: %i; m_pos[to_pos]-m_pos[from_pos]: %i \n",from_pos, to_pos, m_pos[to_pos]-m_pos[from_pos]); 
02286                 int32_t frame = m_orf_info.element(from_state,0);
02287                 if (false)//(frame>=0)
02288                 {
02289                     int32_t num_current_svms=0;
02290                     int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02291                     SG_PRINT("penalties(%i, %i), frame:%i  ", from_state, to_state, frame);
02292                     PEN.element(to_state, from_state)->get_used_svms(&num_current_svms, svm_ids);
02293                     SG_PRINT("\n");
02294                 }
02295 
02296                 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame);
02297 #ifdef DYNPROG_DEBUG
02298                 SG_PRINT("part2: pos1: %i  pos2: %i   \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) ;
02299                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02300                     SG_PRINT("%1.4f  ", svm_value[s]);
02301                 SG_PRINT("\n");
02302 #endif
02303             }
02304         }
02305 
02306         if (PEN.element(to_state, from_state)!=NULL)
02307         {
02308             float64_t nscore = 0 ;
02309             if (is_long_transition)
02310             {
02311                 float64_t pen_value_part1 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ;
02312                 float64_t pen_value_part2 = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ;
02313                 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
02314             }
02315             else
02316                 nscore = PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ;
02317             
02318             if (false)//(nscore<-1e9)
02319                     SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n", 
02320                         is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ;
02321     
02322             my_scores[i] += nscore ;
02323 
02324             for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/
02325             {
02326                 svm_value[s]=-CMath::INFTY;
02327                 svm_value_part1[s]=-CMath::INFTY;
02328                 svm_value_part2[s]=-CMath::INFTY;
02329             }
02330 
02331 #ifdef DYNPROG_DEBUG
02332             //SG_DEBUG( "%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, m_pos[from_pos], to_pos, m_pos[to_pos], m_pos[to_pos]-m_pos[from_pos]) ;
02333 #endif
02334             if (is_long_transition)
02335             {
02336 #ifdef DYNPROG_DEBUG
02337                 float64_t sum_score = 0.0 ;
02338 
02339                 for (int kk=0; kk<i; kk++)
02340                     sum_score += my_scores[i] ;
02341 
02342                 SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i)  2: %1.6f (%i-%i) \n", 
02343                         is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, 
02344                         nscore, sum_score, 
02345                         PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh], 
02346                         PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
02347 #endif
02348             }
02349 
02350             if (is_long_transition)
02351             {
02352                 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ;
02353                 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
02354             }
02355             else
02356                 PEN.element(to_state, from_state)->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ;
02357 
02358             //SG_PRINT("m_num_raw_data = %i \n", m_num_raw_data) ;
02359 
02360             // for tiling array and rna-seq data every single measurement must be added to the derivative 
02361             // in contrast to the content svm predictions where we have a single value per transition;
02362             // content svm predictions have already been added to the derivative, thus we start with d=1 
02363             // instead of d=0
02364             if (is_long_transition)
02365             {
02366                 for (int32_t d=1; d<=m_num_raw_data; d++) 
02367                 {
02368                     for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02369                         svm_value[s]=-CMath::INFTY;
02370                     float64_t* intensities = new float64_t[m_num_probes_cum[d]];
02371                     int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d);
02372                     for (int32_t k=0;k<num_intensities;k++)
02373                     {
02374                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02375                             svm_value[j]=intensities[k];
02376 
02377                         PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;  
02378 
02379                     }
02380                     num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d);
02381                     for (int32_t k=0;k<num_intensities;k++)
02382                     {
02383                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02384                             svm_value[j]=intensities[k];
02385 
02386                         PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;  
02387 
02388                     }
02389                     delete[] intensities;
02390 
02391                 }
02392             }
02393             else
02394             {
02395                 for (int32_t d=1; d<=m_num_raw_data; d++) 
02396                 {
02397                     for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02398                         svm_value[s]=-CMath::INFTY;
02399                     float64_t* intensities = new float64_t[m_num_probes_cum[d]];
02400                     int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d);
02401                     //SG_PRINT("m_pos[from_pos]:%i, m_pos[to_pos]:%i, num_intensities:%i\n",m_pos[from_pos],m_pos[to_pos], num_intensities);
02402                     for (int32_t k=0;k<num_intensities;k++)
02403                     {
02404                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02405                             svm_value[j]=intensities[k];
02406 
02407                         PEN.element(to_state, from_state)->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ;    
02408 
02409                     }
02410                     delete[] intensities;
02411                 }
02412             }
02413 
02414         }
02415 #ifdef DYNPROG_DEBUG
02416         SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02417 #endif
02418 
02419         //SG_DEBUG( "emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) ;
02420         for (int32_t k=0; k<max_num_signals; k++)
02421         {
02422             if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
02423             {
02424 #ifdef DYNPROG_DEBUG
02425                 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
02426 #endif
02427                 my_scores[i] += seq_input.element(to_state, to_pos, k) ;
02428                 //if (seq_input.element(to_state, to_pos, k) !=0)
02429                 //  SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k));
02430                 break ;
02431             }
02432             if (PEN_state_signals.element(to_state, k)!=NULL)
02433             {
02434                 float64_t nscore = PEN_state_signals.element(to_state,k)->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ; // this should be ok for long_transitions (svm_value does not matter)
02435                 my_scores[i] += nscore ;
02436 #ifdef DYNPROG_DEBUG
02437                 if (false)//(nscore<-1e9)
02438                 {
02439                     SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n", 
02440                         is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ;
02441                     for (int x=0; x<23; x++)
02442                     {
02443                         for (int i=-10; i<10; i++)
02444                             SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k));
02445                         SG_PRINT("\n");
02446                     }
02447                     
02448                 }
02449 #endif
02450                 //break ;
02451                 //int32_t num_current_svms=0;
02452                 //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02453                 //SG_PRINT("PEN_state_signals->id: ");
02454                 //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids);
02455                 //SG_PRINT("\n");
02456                 //if (nscore != 0)
02457                 //SG_PRINT( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
02458 #ifdef DYNPROG_DEBUG
02459                 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
02460 #endif
02461                 PEN_state_signals.element(to_state,k)->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ; // this should be ok for long_transitions (svm_value does not matter)
02462             } else
02463                 break ;
02464         }
02465 
02466         //#ifdef DYNPROG_DEBUG
02467         //SG_PRINT( "scores[%i]=%f (final) \n", i, my_scores[i]) ;
02468         //SG_PRINT( "losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) ;
02469         total_score += my_scores[i] ;
02470         total_loss += my_losses[i] ;
02471         //#endif
02472     }
02473     //#ifdef DYNPROG_DEBUG
02474     //SG_PRINT( "total score = %f \n", total_score) ;
02475     //SG_PRINT( "total loss = %f \n", total_loss) ;
02476     //#endif
02477     delete[] svm_value;
02478     delete[] svm_value_part1 ;
02479     delete[] svm_value_part2 ;
02480 }
02481 
02482 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type)
02483 {
02484     ASSERT(from_pos<to_pos);
02485     int32_t num_intensities = 0;
02486     int32_t* p_tiling_pos  = &m_probe_pos[m_num_probes_cum[type-1]];
02487     float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]];
02488     int32_t last_pos;
02489     int32_t num = m_num_probes_cum[type-1];
02490     while (*p_tiling_pos<to_pos)
02491     {
02492         if (*p_tiling_pos>=from_pos)
02493         {
02494             intensities[num_intensities] = *p_tiling_data;
02495             num_intensities++;
02496         }
02497         num++;
02498         if (num>=m_num_probes_cum[type])
02499             break;
02500         last_pos = *p_tiling_pos;
02501         p_tiling_pos++;
02502         p_tiling_data++;
02503         ASSERT(last_pos<*p_tiling_pos);
02504     }
02505     return num_intensities;
02506 }
02507 
02508 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame)
02509 {
02510 #ifdef DYNPROG_TIMING_DETAIL
02511     MyTime.start() ;
02512 #endif
02513 //  ASSERT(from_state<to_state);
02514 //  if (!(from_pos<to_pos))
02515 //      SG_ERROR("from_pos!<to_pos, from_pos: %i to_pos: %i \n",from_pos,to_pos);
02516     for (int32_t i=0;i<m_num_svms;i++)
02517     {
02518         float64_t to_val   = m_lin_feat.get_element(i, to_state);
02519         float64_t from_val = m_lin_feat.get_element(i, from_state);
02520         svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
02521     }
02522     for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++)
02523     {
02524         float64_t to_val   = m_lin_feat.get_element(i, to_state);
02525         float64_t from_val = m_lin_feat.get_element(i, from_state);
02526         svm_values[i] = to_val-from_val ;
02527     }
02528     if (m_intron_list)
02529     {
02530         int32_t* support = new int32_t[m_num_intron_plifs];
02531         m_intron_list->get_intron_support(support, from_state, to_state);
02532         int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data];
02533         int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; 
02534         int32_t cnt = 0;
02535         for (int32_t i=intron_list_start; i<intron_list_end;i++)
02536         {
02537             svm_values[i] = (float64_t) (support[cnt]);
02538             cnt++;
02539         }
02540         //if (to_pos>3990 && to_pos<4010)
02541         //  SG_PRINT("from_state:%i to_state:%i support[0]:%i support[1]:%i\n",from_state, to_state, support[0], support[1]);
02542         delete[] support;
02543     }
02544     // find the correct row with precomputed frame predictions
02545     if (frame!=-1)
02546     {
02547         svm_values[frame_plifs[0]] = 1e10;
02548         svm_values[frame_plifs[1]] = 1e10;
02549         svm_values[frame_plifs[2]] = 1e10;
02550         int32_t global_frame = from_pos%3;
02551         int32_t row = ((global_frame+frame)%3)+4;
02552         float64_t to_val   = m_lin_feat.get_element(row, to_state);
02553         float64_t from_val = m_lin_feat.get_element(row, from_state);
02554         svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
02555     }
02556 #ifdef DYNPROG_TIMING_DETAIL
02557     MyTime.stop() ;
02558     content_svm_values_time += MyTime.time_diff_sec() ;
02559 #endif
02560 }
02561 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs)
02562 {
02563     m_intron_list = intron_list;
02564     m_num_intron_plifs = num_plifs;
02565 }
02566 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation