DynProg.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 2 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2007 Soeren Sonnenburg
00008  * Written (W) 1999-2007 Gunnar Raetsch
00009  * Written (W) 2008-2009 Jonas Behr
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include <shogun/structure/DynProg.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/io/SGIO.h>
00016 #include <shogun/lib/config.h>
00017 #include <shogun/features/StringFeatures.h>
00018 #include <shogun/features/Alphabet.h>
00019 #include <shogun/structure/Plif.h>
00020 #include <shogun/structure/IntronList.h>
00021 
00022 #include <stdlib.h>
00023 #include <stdio.h>
00024 #include <time.h>
00025 #include <ctype.h>
00026 
00027 using namespace shogun;
00028 
00029 //#define USE_TMP_ARRAYCLASS
00030 //#define DYNPROG_DEBUG
00031 
00032 int32_t CDynProg::word_degree_default[4]={3,4,5,6} ;
00033 int32_t CDynProg::cum_num_words_default[5]={0,64,320,1344,5440} ;
00034 int32_t CDynProg::frame_plifs[3]={4,5,6};
00035 int32_t CDynProg::num_words_default[4]=   {64,256,1024,4096} ;
00036 int32_t CDynProg::mod_words_default[32] = {1,1,1,1,1,1,1,1,
00037                                     1,1,1,1,1,1,1,1,
00038                                     0,0,0,0,0,0,0,0,
00039                                     0,0,0,0,0,0,0,0} ;
00040 bool CDynProg::sign_words_default[16] = {true,true,true,true,true,true,true,true,
00041                                       false,false,false,false,false,false,false,false} ; // whether to use counts or signum of counts
00042 int32_t CDynProg::string_words_default[16] = {0,0,0,0,0,0,0,0,
00043                                        1,1,1,1,1,1,1,1} ; // which string should be used
00044 
00045 CDynProg::CDynProg(int32_t num_svms /*= 8 */)
00046 : CSGObject(), m_transition_matrix_a_id(1,1), m_transition_matrix_a(1,1),
00047     m_transition_matrix_a_deriv(1,1), m_initial_state_distribution_p(1),
00048     m_initial_state_distribution_p_deriv(1), m_end_state_distribution_q(1),
00049     m_end_state_distribution_q_deriv(1),
00050 
00051       // multi svm
00052       m_num_degrees(4),
00053       m_num_svms(num_svms),
00054       m_word_degree(word_degree_default, m_num_degrees, true, true),
00055       m_cum_num_words(cum_num_words_default, m_num_degrees+1, true, true),
00056       m_cum_num_words_array(m_cum_num_words.get_array()),
00057       m_num_words(num_words_default, m_num_degrees, true, true),
00058       m_num_words_array(m_num_words.get_array()),
00059       m_mod_words(mod_words_default, m_num_svms, 2, true, true),
00060       m_mod_words_array(m_mod_words.get_array()),
00061       m_sign_words(sign_words_default, m_num_svms, true, true),
00062       m_sign_words_array(m_sign_words.get_array()),
00063       m_string_words(string_words_default, m_num_svms, true, true),
00064       m_string_words_array(m_string_words.get_array()),
00065       //m_svm_pos_start(m_num_degrees),
00066       m_num_unique_words(m_num_degrees),
00067       m_svm_arrays_clean(true),
00068 
00069       m_max_a_id(0), m_observation_matrix(1,1,1),
00070       m_pos(1),
00071       m_seq_len(0),
00072       m_orf_info(1,2),
00073       m_plif_list(1),
00074       m_PEN(1,1),
00075       m_genestr(1), m_wordstr(NULL), m_dict_weights(1,1), m_segment_loss(1,1,2),
00076       m_segment_ids(1),
00077       m_segment_mask(1),
00078       m_my_state_seq(1),
00079       m_my_pos_seq(1),
00080       m_my_scores(1),
00081       m_my_losses(1),
00082       m_scores(1),
00083       m_states(1,1),
00084       m_positions(1,1),
00085 
00086       m_seq_sparse1(NULL),
00087       m_seq_sparse2(NULL),
00088       m_plif_matrices(NULL),
00089 
00090       m_genestr_stop(1),
00091       m_intron_list(NULL),
00092       m_num_intron_plifs(0),
00093       m_lin_feat(1,1), //by Jonas
00094       m_raw_intensities(NULL),
00095       m_probe_pos(NULL),
00096       m_num_probes_cum(NULL),
00097       m_num_lin_feat_plifs_cum(NULL),
00098       m_num_raw_data(0),
00099 
00100       m_long_transitions(true),
00101       m_long_transition_threshold(1000)
00102 {
00103     trans_list_forward = NULL ;
00104     trans_list_forward_cnt = NULL ;
00105     trans_list_forward_val = NULL ;
00106     trans_list_forward_id = NULL ;
00107     trans_list_len = 0 ;
00108 
00109     mem_initialized = true ;
00110 
00111     m_N=1;
00112 
00113     m_raw_intensities = NULL;
00114     m_probe_pos = NULL;
00115     m_num_probes_cum = SG_MALLOC(int32_t, 100);
00116     m_num_probes_cum[0] = 0;
00117     //m_use_tiling=false;
00118     m_num_lin_feat_plifs_cum = SG_MALLOC(int32_t, 100);
00119     m_num_lin_feat_plifs_cum[0] = m_num_svms;
00120     m_num_raw_data = 0;
00121 #ifdef ARRAY_STATISTICS
00122     m_word_degree.set_array_name("word_degree");
00123 #endif
00124 
00125     m_transition_matrix_a_id.set_array_name("transition_matrix_a_id");
00126     m_transition_matrix_a.set_array_name("transition_matrix_a");
00127     m_transition_matrix_a_deriv.set_array_name("transition_matrix_a_deriv");
00128     m_mod_words.set_array_name("mod_words");
00129     m_orf_info.set_array_name("orf_info");
00130     m_segment_sum_weights.set_array_name("segment_sum_weights");
00131     m_PEN.set_array_name("PEN");
00132     m_PEN_state_signals.set_array_name("PEN_state_signals");
00133     m_dict_weights.set_array_name("dict_weights");
00134     m_states.set_array_name("states");
00135     m_positions.set_array_name("positions");
00136     m_lin_feat.set_array_name("lin_feat");
00137 
00138 
00139     m_observation_matrix.set_array_name("m_observation_matrix");
00140     m_segment_loss.set_array_name("m_segment_loss");
00141     m_seg_loss_obj = new CSegmentLoss();
00142 }
00143 
00144 CDynProg::~CDynProg()
00145 {
00146     if (trans_list_forward_cnt)
00147         SG_FREE(trans_list_forward_cnt);
00148     if (trans_list_forward)
00149     {
00150         for (int32_t i=0; i<trans_list_len; i++)
00151         {
00152             if (trans_list_forward[i])
00153                 SG_FREE(trans_list_forward[i]);
00154         }
00155         SG_FREE(trans_list_forward);
00156     }
00157     if (trans_list_forward_val)
00158     {
00159         for (int32_t i=0; i<trans_list_len; i++)
00160         {
00161             if (trans_list_forward_val[i])
00162                 SG_FREE(trans_list_forward_val[i]);
00163         }
00164         SG_FREE(trans_list_forward_val);
00165     }
00166     if (trans_list_forward_id)
00167     {
00168         for (int32_t i=0; i<trans_list_len; i++)
00169         {
00170             if (trans_list_forward_id[i])
00171                 SG_FREE(trans_list_forward_id[i]);
00172         }
00173         SG_FREE(trans_list_forward_id);
00174     }
00175     if (m_raw_intensities)
00176         SG_FREE(m_raw_intensities);
00177     if (m_probe_pos)
00178         SG_FREE(m_probe_pos);
00179     if (m_num_probes_cum)
00180       SG_FREE(m_num_probes_cum);
00181     if (m_num_lin_feat_plifs_cum)
00182       SG_FREE(m_num_lin_feat_plifs_cum);
00183 
00184     delete m_intron_list;
00185 
00186     SG_UNREF(m_seq_sparse1);
00187     SG_UNREF(m_seq_sparse2);
00188     SG_UNREF(m_plif_matrices);
00189 }
00190 
00192 int32_t CDynProg::get_num_svms()
00193 {
00194     return m_num_svms;
00195 }
00196 
00197 void CDynProg::precompute_stop_codons()
00198 {
00199     int32_t length=m_genestr.get_dim1();
00200 
00201     m_genestr_stop.resize_array(length) ;
00202     m_genestr_stop.set_const(0) ;
00203     m_genestr_stop.set_array_name("genestr_stop") ;
00204     {
00205         for (int32_t i=0; i<length-2; i++)
00206             if ((m_genestr[i]=='t' || m_genestr[i]=='T') &&
00207                     (((m_genestr[i+1]=='a' || m_genestr[i+1]=='A') &&
00208                       (m_genestr[i+2]=='a' || m_genestr[i+2]=='g' || m_genestr[i+2]=='A' || m_genestr[i+2]=='G')) ||
00209                      ((m_genestr[i+1]=='g'||m_genestr[i+1]=='G') && (m_genestr[i+2]=='a' || m_genestr[i+2]=='A') )))
00210             {
00211                 m_genestr_stop.element(i)=true ;
00212             }
00213             else
00214                 m_genestr_stop.element(i)=false ;
00215         m_genestr_stop.element(length-2)=false ;
00216         m_genestr_stop.element(length-1)=false ;
00217     }
00218 }
00219 
00220 void CDynProg::set_num_states(int32_t p_N)
00221 {
00222     m_N=p_N ;
00223 
00224     m_transition_matrix_a_id.resize_array(m_N,m_N) ;
00225     m_transition_matrix_a.resize_array(m_N,m_N) ;
00226     m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
00227     m_initial_state_distribution_p.resize_array(m_N) ;
00228     m_initial_state_distribution_p_deriv.resize_array(m_N) ;
00229     m_end_state_distribution_q.resize_array(m_N);
00230     m_end_state_distribution_q_deriv.resize_array(m_N) ;
00231 
00232     m_orf_info.resize_array(m_N,2) ;
00233     m_PEN.resize_array(m_N,m_N) ;
00234 }
00235 
00236 int32_t CDynProg::get_num_states()
00237 {
00238     return m_N;
00239 }
00240 
00241 void CDynProg::init_tiling_data(
00242     int32_t* probe_pos, float64_t* intensities, const int32_t num_probes)
00243 {
00244     m_num_raw_data++;
00245     m_num_probes_cum[m_num_raw_data] = m_num_probes_cum[m_num_raw_data-1]+num_probes;
00246 
00247     int32_t* tmp_probe_pos = SG_MALLOC(int32_t, m_num_probes_cum[m_num_raw_data]);
00248     float64_t* tmp_raw_intensities = SG_MALLOC(float64_t, m_num_probes_cum[m_num_raw_data]);
00249 
00250 
00251     if (m_num_raw_data==1){
00252         memcpy(tmp_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00253         memcpy(tmp_raw_intensities, intensities, num_probes*sizeof(float64_t));
00254         //SG_PRINT("raw_intens:%f \n",*tmp_raw_intensities+2);
00255     }else{
00256         memcpy(tmp_probe_pos, m_probe_pos, m_num_probes_cum[m_num_raw_data-1]*sizeof(int32_t));
00257         memcpy(tmp_raw_intensities, m_raw_intensities, m_num_probes_cum[m_num_raw_data-1]*sizeof(float64_t));
00258         memcpy(tmp_probe_pos+m_num_probes_cum[m_num_raw_data-1], probe_pos, num_probes*sizeof(int32_t));
00259         memcpy(tmp_raw_intensities+m_num_probes_cum[m_num_raw_data-1], intensities, num_probes*sizeof(float64_t));
00260     }
00261     SG_FREE(m_probe_pos);
00262     SG_FREE(m_raw_intensities);
00263     m_probe_pos = tmp_probe_pos; //SG_MALLOC(int32_t, num_probes);
00264     m_raw_intensities = tmp_raw_intensities;//SG_MALLOC(float64_t, num_probes);
00265 
00266     //memcpy(m_probe_pos, probe_pos, num_probes*sizeof(int32_t));
00267     //memcpy(m_raw_intensities, intensities, num_probes*sizeof(float64_t));
00268 
00269 }
00270 
00271 void CDynProg::init_content_svm_value_array(const int32_t p_num_svms)
00272 {
00273     m_lin_feat.resize_array(p_num_svms, m_seq_len);
00274 
00275     // initialize array
00276     for (int s=0; s<p_num_svms; s++)
00277       for (int p=0; p<m_seq_len; p++)
00278         m_lin_feat.set_element(0.0, s, p) ;
00279 }
00280 
00281 void CDynProg::resize_lin_feat(const int32_t num_new_feat)
00282 {
00283     int32_t dim1, dim2;
00284     m_lin_feat.get_array_size(dim1, dim2);
00285     ASSERT(dim1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]);
00286     ASSERT(dim2==m_seq_len); // == number of candidate positions
00287 
00288 
00289 
00290     float64_t* arr = m_lin_feat.get_array();
00291     float64_t* tmp = SG_MALLOC(float64_t, (dim1+num_new_feat)*dim2);
00292     memset(tmp, 0, (dim1+num_new_feat)*dim2*sizeof(float64_t)) ;
00293     for(int32_t j=0;j<m_seq_len;j++)
00294                 for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data-1];k++)
00295             tmp[j*(dim1+num_new_feat)+k] = arr[j*dim1+k];
00296 
00297     m_lin_feat.set_array(tmp, dim1+num_new_feat,dim2, true, true);// copy array and free it later
00298     SG_FREE(tmp);
00299 
00300     /*for(int32_t j=0;j<5;j++)
00301     {
00302         for(int32_t k=0;k<m_num_lin_feat_plifs_cum[m_num_raw_data];k++)
00303         {
00304             SG_PRINT("(%i,%i)%f ",k,j,m_lin_feat.get_element(k,j));
00305         }
00306         SG_PRINT("\n");
00307     }
00308     m_lin_feat.get_array_size(dim1,dim2);
00309     SG_PRINT("resize_lin_feat: dim1:%i, dim2:%i\n",dim1,dim2);*/
00310 
00311     //SG_PRINT("resize_lin_feat: done\n");
00312 }
00313 
00314 void CDynProg::precompute_tiling_plifs(
00315     CPlif** PEN, const int32_t* tiling_plif_ids, const int32_t num_tiling_plifs)
00316 {
00317     m_num_lin_feat_plifs_cum[m_num_raw_data] = m_num_lin_feat_plifs_cum[m_num_raw_data-1]+ num_tiling_plifs;
00318     float64_t* tiling_plif = SG_MALLOC(float64_t, num_tiling_plifs);
00319     float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
00320     for (int32_t i=0; i<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; i++)
00321         svm_value[i]=0.0;
00322     int32_t* tiling_rows = SG_MALLOC(int32_t, num_tiling_plifs);
00323     for (int32_t i=0; i<num_tiling_plifs; i++)
00324     {
00325         tiling_plif[i]=0.0;
00326         CPlif * plif = PEN[tiling_plif_ids[i]];
00327         tiling_rows[i] = plif->get_use_svm();
00328 
00329         ASSERT(tiling_rows[i]-1==m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i)
00330     }
00331     resize_lin_feat(num_tiling_plifs);
00332 
00333 
00334     int32_t* p_tiling_pos  = &m_probe_pos[m_num_probes_cum[m_num_raw_data-1]];
00335     float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[m_num_raw_data-1]];
00336     int32_t num=m_num_probes_cum[m_num_raw_data-1];
00337 
00338     for (int32_t pos_idx=0;pos_idx<m_seq_len;pos_idx++)
00339     {
00340         while (num<m_num_probes_cum[m_num_raw_data]&&*p_tiling_pos<m_pos[pos_idx])
00341         {
00342             for (int32_t i=0; i<num_tiling_plifs; i++)
00343             {
00344                 svm_value[m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i]=*p_tiling_data;
00345                 CPlif * plif = PEN[tiling_plif_ids[i]];
00346                 ASSERT(m_num_lin_feat_plifs_cum[m_num_raw_data-1]+i==plif->get_use_svm()-1);
00347                 plif->set_do_calc(true);
00348                 tiling_plif[i]+=plif->lookup_penalty(0,svm_value);
00349                 plif->set_do_calc(false);
00350             }
00351             p_tiling_data++;
00352             p_tiling_pos++;
00353             num++;
00354         }
00355         for (int32_t i=0; i<num_tiling_plifs; i++)
00356             m_lin_feat.set_element(tiling_plif[i],tiling_rows[i]-1,pos_idx);
00357     }
00358     SG_FREE(svm_value);
00359     SG_FREE(tiling_plif);
00360     SG_FREE(tiling_rows);
00361 }
00362 
00363 void CDynProg::create_word_string()
00364 {
00365     SG_FREE(m_wordstr);
00366     m_wordstr=SG_MALLOC(uint16_t**, 5440);
00367     int32_t k=0;
00368     int32_t genestr_len=m_genestr.get_dim1();
00369 
00370     m_wordstr[k]=SG_MALLOC(uint16_t*, m_num_degrees);
00371     for (int32_t j=0; j<m_num_degrees; j++)
00372     {
00373         m_wordstr[k][j]=NULL ;
00374         {
00375             m_wordstr[k][j]=SG_MALLOC(uint16_t, genestr_len);
00376             for (int32_t i=0; i<genestr_len; i++)
00377                 switch (m_genestr[i])
00378                 {
00379                     case 'A':
00380                     case 'a': m_wordstr[k][j][i]=0 ; break ;
00381                     case 'C':
00382                     case 'c': m_wordstr[k][j][i]=1 ; break ;
00383                     case 'G':
00384                     case 'g': m_wordstr[k][j][i]=2 ; break ;
00385                     case 'T':
00386                     case 't': m_wordstr[k][j][i]=3 ; break ;
00387                     default: ASSERT(0) ;
00388                 }
00389             CAlphabet::translate_from_single_order(m_wordstr[k][j], genestr_len, m_word_degree[j]-1, m_word_degree[j], 2) ;
00390         }
00391     }
00392 }
00393 
00394 void CDynProg::precompute_content_values()
00395 {
00396     for (int32_t s=0; s<m_num_svms; s++)
00397       m_lin_feat.set_element(0.0, s, 0);
00398 
00399     for (int32_t p=0 ; p<m_seq_len-1 ; p++)
00400     {
00401         int32_t from_pos = m_pos[p];
00402         int32_t to_pos = m_pos[p+1];
00403         float64_t* my_svm_values_unnormalized = SG_MALLOC(float64_t, m_num_svms);
00404         //SG_PRINT("%i(%i->%i) ",p,from_pos, to_pos);
00405 
00406         ASSERT(from_pos<=m_genestr.get_dim1());
00407         ASSERT(to_pos<=m_genestr.get_dim1());
00408 
00409         for (int32_t s=0; s<m_num_svms; s++)
00410             my_svm_values_unnormalized[s]=0.0;//precomputed_svm_values.element(s,p);
00411 
00412         for (int32_t i=from_pos; i<to_pos; i++)
00413         {
00414             for (int32_t j=0; j<m_num_degrees; j++)
00415             {
00416                 uint16_t word = m_wordstr[0][j][i] ;
00417                 for (int32_t s=0; s<m_num_svms; s++)
00418                 {
00419                     // check if this k-mer should be considered for this SVM
00420                     if (m_mod_words.get_element(s,0)==3 && i%3!=m_mod_words.get_element(s,1))
00421                         continue;
00422                     my_svm_values_unnormalized[s] += m_dict_weights[(word+m_cum_num_words_array[j])+s*m_cum_num_words_array[m_num_degrees]] ;
00423                 }
00424             }
00425         }
00426         for (int32_t s=0; s<m_num_svms; s++)
00427         {
00428             float64_t prev = m_lin_feat.get_element(s, p);
00429             //SG_PRINT("elem (%i, %i, %f)\n", s, p, prev) ;
00430             if (prev<-1e20 || prev>1e20)
00431             {
00432                 SG_ERROR("initialization missing (%i, %i, %f)\n", s, p, prev) ;
00433                 prev=0 ;
00434             }
00435             m_lin_feat.set_element(prev + my_svm_values_unnormalized[s], s, p+1);
00436         }
00437         SG_FREE(my_svm_values_unnormalized);
00438     }
00439     //for (int32_t j=0; j<m_num_degrees; j++)
00440     //  SG_FREE(m_wordstr[0][j]);
00441     //SG_FREE(m_wordstr[0]);
00442 }
00443 
00444 void CDynProg::set_p_vector(SGVector<float64_t> p)
00445 {
00446     if (!(p.vlen==m_N))
00447         SG_ERROR("length of start prob vector p (%i) is not equal to the number of states (%i), N: %i\n",p.vlen, m_N);
00448 
00449     m_initial_state_distribution_p.set_array(p.vector, p.vlen, true, true);
00450 }
00451 
00452 void CDynProg::set_q_vector(SGVector<float64_t> q)
00453 {
00454     if (!(q.vlen==m_N))
00455         SG_ERROR("length of end prob vector q (%i) is not equal to the number of states (%i), N: %i\n",q.vlen, m_N);
00456     m_end_state_distribution_q.set_array(q.vector, q.vlen, true, true);
00457 }
00458 
00459 void CDynProg::set_a(SGMatrix<float64_t> a)
00460 {
00461     ASSERT(a.num_cols==m_N);
00462     ASSERT(a.num_rows==m_N);
00463     m_transition_matrix_a.set_array(a.matrix, m_N, m_N, true, true);
00464     m_transition_matrix_a_deriv.resize_array(m_N, m_N);
00465 }
00466 
00467 void CDynProg::set_a_id(SGMatrix<int32_t> a)
00468 {
00469     ASSERT(a.num_cols==m_N);
00470     ASSERT(a.num_rows==m_N);
00471     m_transition_matrix_a_id.set_array(a.matrix, m_N, m_N, true, true);
00472     m_max_a_id = 0;
00473     for (int32_t i=0; i<m_N; i++)
00474     {
00475         for (int32_t j=0; j<m_N; j++)
00476             m_max_a_id=CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j));
00477     }
00478 }
00479 
00480 void CDynProg::set_a_trans_matrix(SGMatrix<float64_t> a_trans)
00481 {
00482     int32_t num_trans=a_trans.num_rows;
00483     int32_t num_cols=a_trans.num_cols;
00484 
00485     //CMath::display_matrix(a_trans.matrix,num_trans, num_cols,"a_trans");
00486 
00487     if (!((num_cols==3) || (num_cols==4)))
00488         SG_ERROR("!((num_cols==3) || (num_cols==4)), num_cols: %i\n",num_cols);
00489 
00490     SG_FREE(trans_list_forward);
00491     SG_FREE(trans_list_forward_cnt);
00492     SG_FREE(trans_list_forward_val);
00493     SG_FREE(trans_list_forward_id);
00494 
00495     trans_list_forward = NULL ;
00496     trans_list_forward_cnt = NULL ;
00497     trans_list_forward_val = NULL ;
00498     trans_list_len = 0 ;
00499 
00500     m_transition_matrix_a.set_const(0) ;
00501     m_transition_matrix_a_id.set_const(0) ;
00502 
00503     mem_initialized = true ;
00504 
00505     trans_list_forward_cnt=NULL ;
00506     trans_list_len = m_N ;
00507     trans_list_forward = SG_MALLOC(T_STATES*, m_N);
00508     trans_list_forward_cnt = SG_MALLOC(T_STATES, m_N);
00509     trans_list_forward_val = SG_MALLOC(float64_t*, m_N);
00510     trans_list_forward_id = SG_MALLOC(int32_t*, m_N);
00511 
00512     int32_t start_idx=0;
00513     for (int32_t j=0; j<m_N; j++)
00514     {
00515         int32_t old_start_idx=start_idx;
00516 
00517         while (start_idx<num_trans && a_trans.matrix[start_idx+num_trans]==j)
00518         {
00519             start_idx++;
00520 
00521             if (start_idx>1 && start_idx<num_trans)
00522                 ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]);
00523         }
00524 
00525         if (start_idx>1 && start_idx<num_trans)
00526             ASSERT(a_trans.matrix[start_idx+num_trans-1] <= a_trans.matrix[start_idx+num_trans]);
00527 
00528         int32_t len=start_idx-old_start_idx;
00529         ASSERT(len>=0);
00530 
00531         trans_list_forward_cnt[j] = 0 ;
00532 
00533         if (len>0)
00534         {
00535             trans_list_forward[j]     = SG_MALLOC(T_STATES, len);
00536             trans_list_forward_val[j] = SG_MALLOC(float64_t, len);
00537             trans_list_forward_id[j] = SG_MALLOC(int32_t, len);
00538         }
00539         else
00540         {
00541             trans_list_forward[j]     = NULL;
00542             trans_list_forward_val[j] = NULL;
00543             trans_list_forward_id[j]  = NULL;
00544         }
00545     }
00546 
00547     for (int32_t i=0; i<num_trans; i++)
00548     {
00549         int32_t from_state   = (int32_t)a_trans.matrix[i] ;
00550         int32_t to_state = (int32_t)a_trans.matrix[i+num_trans] ;
00551         float64_t val = a_trans.matrix[i+num_trans*2] ;
00552         int32_t id = 0 ;
00553         if (num_cols==4)
00554             id = (int32_t)a_trans.matrix[i+num_trans*3] ;
00555         //SG_DEBUG( "id=%i\n", id) ;
00556 
00557         ASSERT(to_state>=0 && to_state<m_N);
00558         ASSERT(from_state>=0 && from_state<m_N);
00559 
00560         trans_list_forward[to_state][trans_list_forward_cnt[to_state]]=from_state ;
00561         trans_list_forward_val[to_state][trans_list_forward_cnt[to_state]]=val ;
00562         trans_list_forward_id[to_state][trans_list_forward_cnt[to_state]]=id ;
00563         trans_list_forward_cnt[to_state]++ ;
00564         m_transition_matrix_a.element(from_state, to_state) = val ;
00565         m_transition_matrix_a_id.element(from_state, to_state) = id ;
00566         //SG_PRINT("from_state:%i to_state:%i trans_matrix_a_id:%i \n",from_state, to_state,m_transition_matrix_a_id.element(from_state, to_state));
00567     } ;
00568 
00569     m_max_a_id = 0 ;
00570     for (int32_t i=0; i<m_N; i++)
00571         for (int32_t j=0; j<m_N; j++)
00572         {
00573             //if (m_transition_matrix_a_id.element(i,j))
00574             //SG_DEBUG( "(%i,%i)=%i\n", i,j, m_transition_matrix_a_id.element(i,j)) ;
00575             m_max_a_id = CMath::max(m_max_a_id, m_transition_matrix_a_id.element(i,j)) ;
00576         }
00577     //SG_DEBUG( "m_max_a_id=%i\n", m_max_a_id) ;
00578 }
00579 
00580 
00581 void CDynProg::init_mod_words_array(SGMatrix<int32_t> mod_words_input)
00582 {
00583     //for (int32_t i=0; i<mod_words_input.num_cols; i++)
00584     //{
00585     //  for (int32_t j=0; j<mod_words_input.num_rows; j++)
00586     //      SG_PRINT("%i ",mod_words_input[i*mod_words_input.num_rows+j]);
00587     //  SG_PRINT("\n");
00588     //}
00589     m_svm_arrays_clean=false ;
00590 
00591     ASSERT(m_num_svms==mod_words_input.num_rows);
00592     ASSERT(mod_words_input.num_cols==2);
00593 
00594     m_mod_words.set_array(mod_words_input.matrix, mod_words_input.num_rows, 2, true, true) ;
00595     m_mod_words_array = m_mod_words.get_array() ;
00596 
00597     /*SG_DEBUG( "m_mod_words=[") ;
00598     for (int32_t i=0; i<mod_words_input.num_rows; i++)
00599         SG_DEBUG( "%i, ", p_mod_words_array[i]) ;
00600         SG_DEBUG( "]\n") ;*/
00601 }
00602 
00603 bool CDynProg::check_svm_arrays()
00604 {
00605     //SG_DEBUG( "wd_dim1=%d, m_cum_num_words=%d, m_num_words=%d, m_svm_pos_start=%d, num_uniq_w=%d, mod_words_dims=(%d,%d), sign_w=%d,string_w=%d\n m_num_degrees=%d, m_num_svms=%d, m_num_strings=%d", m_word_degree.get_dim1(), m_cum_num_words.get_dim1(), m_num_words.get_dim1(), m_svm_pos_start.get_dim1(), m_num_unique_words.get_dim1(), m_mod_words.get_dim1(), m_mod_words.get_dim2(), m_sign_words.get_dim1(), m_string_words.get_dim1(), m_num_degrees, m_num_svms, m_num_strings);
00606     if ((m_word_degree.get_dim1()==m_num_degrees) &&
00607             (m_cum_num_words.get_dim1()==m_num_degrees+1) &&
00608             (m_num_words.get_dim1()==m_num_degrees) &&
00609             //(word_used.get_dim1()==m_num_degrees) &&
00610             //(word_used.get_dim2()==m_num_words[m_num_degrees-1]) &&
00611             //(word_used.get_dim3()==m_num_strings) &&
00612             //      (svm_values_unnormalized.get_dim1()==m_num_degrees) &&
00613             //      (svm_values_unnormalized.get_dim2()==m_num_svms) &&
00614             //(m_svm_pos_start.get_dim1()==m_num_degrees) &&
00615             (m_num_unique_words.get_dim1()==m_num_degrees) &&
00616             (m_mod_words.get_dim1()==m_num_svms) &&
00617             (m_mod_words.get_dim2()==2) &&
00618             (m_sign_words.get_dim1()==m_num_svms) &&
00619             (m_string_words.get_dim1()==m_num_svms))
00620     {
00621         m_svm_arrays_clean=true ;
00622         return true ;
00623     }
00624     else
00625     {
00626         if ((m_num_unique_words.get_dim1()==m_num_degrees) &&
00627             (m_mod_words.get_dim1()==m_num_svms) &&
00628             (m_mod_words.get_dim2()==2) &&
00629             (m_sign_words.get_dim1()==m_num_svms) &&
00630             (m_string_words.get_dim1()==m_num_svms))
00631             SG_PRINT("OK\n") ;
00632         else
00633             SG_PRINT("not OK\n") ;
00634 
00635         if (!(m_word_degree.get_dim1()==m_num_degrees))
00636             SG_WARNING("SVM array: word_degree.get_dim1()!=m_num_degrees") ;
00637         if (!(m_cum_num_words.get_dim1()==m_num_degrees+1))
00638             SG_WARNING("SVM array: m_cum_num_words.get_dim1()!=m_num_degrees+1") ;
00639         if (!(m_num_words.get_dim1()==m_num_degrees))
00640             SG_WARNING("SVM array: m_num_words.get_dim1()==m_num_degrees") ;
00641         //if (!(m_svm_pos_start.get_dim1()==m_num_degrees))
00642         //  SG_WARNING("SVM array: m_svm_pos_start.get_dim1()!=m_num_degrees") ;
00643         if (!(m_num_unique_words.get_dim1()==m_num_degrees))
00644             SG_WARNING("SVM array: m_num_unique_words.get_dim1()!=m_num_degrees") ;
00645         if (!(m_mod_words.get_dim1()==m_num_svms))
00646             SG_WARNING("SVM array: m_mod_words.get_dim1()!=num_svms") ;
00647         if (!(m_mod_words.get_dim2()==2))
00648             SG_WARNING("SVM array: m_mod_words.get_dim2()!=2") ;
00649         if (!(m_sign_words.get_dim1()==m_num_svms))
00650             SG_WARNING("SVM array: m_sign_words.get_dim1()!=num_svms") ;
00651         if (!(m_string_words.get_dim1()==m_num_svms))
00652             SG_WARNING("SVM array: m_string_words.get_dim1()!=num_svms") ;
00653 
00654         m_svm_arrays_clean=false ;
00655         return false ;
00656     }
00657 }
00658 
00659 void CDynProg::set_observation_matrix(SGNDArray<float64_t> seq)
00660 {
00661     if (seq.num_dims!=3)
00662         SG_ERROR("Expected 3-dimensional Matrix\n");
00663 
00664     int32_t N=seq.dims[0];
00665     int32_t cand_pos=seq.dims[1];
00666     int32_t max_num_features=seq.dims[2];
00667 
00668     if (!m_svm_arrays_clean)
00669     {
00670         SG_ERROR( "SVM arrays not clean") ;
00671         return ;
00672     } ;
00673 
00674     ASSERT(N==m_N);
00675     ASSERT(cand_pos==m_seq_len);
00676     ASSERT(m_initial_state_distribution_p.get_dim1()==N);
00677     ASSERT(m_end_state_distribution_q.get_dim1()==N);
00678 
00679     m_observation_matrix.set_array(seq.array, N, m_seq_len, max_num_features, true, true) ;
00680 }
00681 int32_t CDynProg::get_num_positions()
00682 {
00683     return m_seq_len;
00684 }
00685 
00686 void CDynProg::set_content_type_array(SGMatrix<float64_t> seg_path)
00687 {
00688     ASSERT(seg_path.num_rows==2);
00689     ASSERT(seg_path.num_cols==m_seq_len);
00690 
00691     if (seg_path.matrix!=NULL)
00692     {
00693         int32_t *segment_ids = SG_MALLOC(int32_t, m_seq_len);
00694         float64_t *segment_mask = SG_MALLOC(float64_t, m_seq_len);
00695         for (int32_t i=0; i<m_seq_len; i++)
00696         {
00697                 segment_ids[i] = (int32_t)seg_path.matrix[2*i] ;
00698                 segment_mask[i] = seg_path.matrix[2*i+1] ;
00699         }
00700         best_path_set_segment_ids_mask(segment_ids, segment_mask, m_seq_len) ;
00701         SG_FREE(segment_ids);
00702         SG_FREE(segment_mask);
00703     }
00704     else
00705     {
00706         int32_t *izeros = SG_MALLOC(int32_t, m_seq_len);
00707         float64_t *dzeros = SG_MALLOC(float64_t, m_seq_len);
00708         for (int32_t i=0; i<m_seq_len; i++)
00709         {
00710             izeros[i]=0 ;
00711             dzeros[i]=0.0 ;
00712         }
00713         best_path_set_segment_ids_mask(izeros, dzeros, m_seq_len) ;
00714         SG_FREE(izeros);
00715         SG_FREE(dzeros);
00716     }
00717 }
00718 
00719 void CDynProg::set_pos(SGVector<int32_t> pos)
00720 {
00721     m_pos.set_array(pos.vector, pos.vlen, true, true) ;
00722     m_seq_len = pos.vlen;
00723 }
00724 
00725 void CDynProg::set_orf_info(SGMatrix<int32_t> orf_info)
00726 {
00727     if (orf_info.num_cols!=2)
00728         SG_ERROR( "orf_info size incorrect %i!=2\n", orf_info.num_cols) ;
00729 
00730     m_orf_info.set_array(orf_info.matrix, orf_info.num_rows, orf_info.num_cols, true, true) ;
00731     m_orf_info.set_array_name("orf_info") ;
00732 }
00733 
00734 void CDynProg::set_sparse_features(CSparseFeatures<float64_t>* seq_sparse1, CSparseFeatures<float64_t>* seq_sparse2)
00735 {
00736     if ((!seq_sparse1 && seq_sparse2) || (seq_sparse1 && !seq_sparse2))
00737         SG_ERROR("Sparse features must either both be NULL or both NON-NULL\n");
00738 
00739     SG_UNREF(m_seq_sparse1);
00740     SG_UNREF(m_seq_sparse2);
00741 
00742     m_seq_sparse1=seq_sparse1;
00743     m_seq_sparse2=seq_sparse2;
00744     SG_REF(m_seq_sparse1);
00745     SG_REF(m_seq_sparse2);
00746 }
00747 
00748 void CDynProg::set_plif_matrices(CPlifMatrix* pm)
00749 {
00750     SG_UNREF(m_plif_matrices);
00751 
00752     m_plif_matrices=pm;
00753 
00754     SG_REF(m_plif_matrices);
00755 }
00756 
00757 void CDynProg::set_gene_string(SGVector<char> genestr)
00758 {
00759     ASSERT(genestr.vector);
00760     ASSERT(genestr.vlen>0);
00761 
00762     m_genestr.set_array(genestr.vector, genestr.vlen, true, true) ;
00763 }
00764 
00765 void CDynProg::set_my_state_seq(int32_t* my_state_seq)
00766 {
00767     ASSERT(my_state_seq && m_seq_len>0);
00768     m_my_state_seq.resize_array(m_seq_len);
00769     for (int32_t i=0; i<m_seq_len; i++)
00770         m_my_state_seq[i]=my_state_seq[i];
00771 }
00772 
00773 void CDynProg::set_my_pos_seq(int32_t* my_pos_seq)
00774 {
00775     ASSERT(my_pos_seq && m_seq_len>0);
00776     m_my_pos_seq.resize_array(m_seq_len);
00777     for (int32_t i=0; i<m_seq_len; i++)
00778         m_my_pos_seq[i]=my_pos_seq[i];
00779 }
00780 
00781 void CDynProg::set_dict_weights(SGMatrix<float64_t> dictionary_weights)
00782 {
00783     if (m_num_svms!=dictionary_weights.num_cols)
00784     {
00785         SG_ERROR( "m_dict_weights array does not match num_svms=%i!=%i\n",
00786                 m_num_svms, dictionary_weights.num_cols) ;
00787     }
00788 
00789     m_dict_weights.set_array(dictionary_weights.matrix, dictionary_weights.num_rows, m_num_svms, true, true) ;
00790 
00791     // initialize, so it does not bother when not used
00792     m_segment_loss.resize_array(m_max_a_id+1, m_max_a_id+1, 2) ;
00793     m_segment_loss.set_const(0) ;
00794     m_segment_ids.resize_array(m_observation_matrix.get_dim2()) ;
00795     m_segment_mask.resize_array(m_observation_matrix.get_dim2()) ;
00796     m_segment_ids.set_const(0) ;
00797     m_segment_mask.set_const(0) ;
00798 }
00799 
00800 void CDynProg::best_path_set_segment_loss(SGMatrix<float64_t> segment_loss)
00801 {
00802     int32_t m=segment_loss.num_rows;
00803     int32_t n=segment_loss.num_cols;
00804     // here we need two matrices. Store it in one: 2N x N
00805     if (2*m!=n)
00806         SG_ERROR( "segment_loss should be 2 x quadratic matrix: %i!=%i\n", 2*m, n) ;
00807 
00808     if (m!=m_max_a_id+1)
00809         SG_ERROR( "segment_loss size should match m_max_a_id: %i!=%i\n", m, m_max_a_id+1) ;
00810 
00811     m_segment_loss.set_array(segment_loss.matrix, m, n/2, 2, true, true) ;
00812     /*for (int32_t i=0; i<n; i++)
00813         for (int32_t j=0; j<n; j++)
00814         SG_DEBUG( "loss(%i,%i)=%f\n", i,j, m_segment_loss.element(0,i,j)) ;*/
00815 }
00816 
00817 void CDynProg::best_path_set_segment_ids_mask(
00818     int32_t* segment_ids, float64_t* segment_mask, int32_t m)
00819 {
00820 
00821     if (m!=m_observation_matrix.get_dim2())
00822         SG_ERROR("size of segment_ids or segment_mask (%i)  does not match the size of the feature matrix (%i)", m, m_observation_matrix.get_dim2());
00823     int32_t max_id = 0;
00824     for (int32_t i=1;i<m;i++)
00825         max_id = CMath::max(max_id,segment_ids[i]);
00826     //SG_PRINT("max_id: %i, m:%i\n",max_id, m);
00827     m_segment_ids.set_array(segment_ids, m, true, true) ;
00828     m_segment_ids.set_array_name("m_segment_ids");
00829     m_segment_mask.set_array(segment_mask, m, true, true) ;
00830     m_segment_mask.set_array_name("m_segment_mask");
00831 
00832     m_seg_loss_obj->set_segment_mask(&m_segment_mask);
00833     m_seg_loss_obj->set_segment_ids(&m_segment_ids);
00834     m_seg_loss_obj->compute_loss(m_pos.get_array(), m_seq_len);
00835 }
00836 
00837 SGVector<float64_t> CDynProg::get_scores()
00838 {
00839     SGVector<float64_t> scores(m_scores.get_dim1());
00840     memcpy(scores.vector,m_scores.get_array(), sizeof(float64_t)*(m_scores.get_dim1()));
00841 
00842     return scores;
00843 }
00844 
00845 SGMatrix<int32_t> CDynProg::get_states()
00846 {
00847     SGMatrix<int32_t> states(m_states.get_dim1(), m_states.get_dim2());
00848 
00849     int32_t sz = sizeof(int32_t)*( m_states.get_dim1() * m_states.get_dim2() );
00850     memcpy(states.matrix ,m_states.get_array(),sz);
00851 
00852     return states;
00853 }
00854 
00855 SGMatrix<int32_t> CDynProg::get_positions()
00856 {
00857    SGMatrix<int32_t> positions(m_positions.get_dim1(), m_positions.get_dim2());
00858 
00859    int32_t sz = sizeof(int32_t)*(m_positions.get_dim1()*m_positions.get_dim2());
00860    memcpy(positions.matrix, m_positions.get_array(),sz);
00861 
00862    return positions;
00863 }
00864 
00865 void CDynProg::get_path_scores(float64_t** scores, int32_t* seq_len)
00866 {
00867    ASSERT(scores && seq_len);
00868 
00869    *seq_len=m_my_scores.get_dim1();
00870 
00871    int32_t sz = sizeof(float64_t)*(*seq_len);
00872 
00873    *scores = SG_MALLOC(float64_t, *seq_len);
00874    ASSERT(*scores);
00875 
00876    memcpy(*scores,m_my_scores.get_array(),sz);
00877 }
00878 
00879 void CDynProg::get_path_losses(float64_t** losses, int32_t* seq_len)
00880 {
00881     ASSERT(losses && seq_len);
00882 
00883     *seq_len=m_my_losses.get_dim1();
00884 
00885    int32_t sz = sizeof(float64_t)*(*seq_len);
00886 
00887    *losses = SG_MALLOC(float64_t, *seq_len);
00888    ASSERT(*losses);
00889 
00890    memcpy(*losses,m_my_losses.get_array(),sz);
00891 }
00892 
00894 
00895 bool CDynProg::extend_orf(
00896     int32_t orf_from, int32_t orf_to, int32_t start, int32_t &last_pos,
00897     int32_t to)
00898 {
00899 #ifdef DYNPROG_TIMING_DETAIL
00900     MyTime.start() ;
00901 #endif
00902 
00903     if (start<0)
00904         start=0 ;
00905     if (to<0)
00906         to=0 ;
00907 
00908     int32_t orf_target = orf_to-orf_from ;
00909     if (orf_target<0) orf_target+=3 ;
00910 
00911     int32_t pos ;
00912     if (last_pos==to)
00913         pos = to-orf_to-3 ;
00914     else
00915         pos=last_pos ;
00916 
00917     if (pos<0)
00918     {
00919 #ifdef DYNPROG_TIMING_DETAIL
00920         MyTime.stop() ;
00921         orf_time += MyTime.time_diff_sec() ;
00922 #endif
00923         return true ;
00924     }
00925 
00926     for (; pos>=start; pos-=3)
00927         if (m_genestr_stop[pos])
00928         {
00929 #ifdef DYNPROG_TIMING_DETAIL
00930             MyTime.stop() ;
00931             orf_time += MyTime.time_diff_sec() ;
00932 #endif
00933             return false ;
00934         }
00935 
00936 
00937     last_pos = CMath::min(pos+3,to-orf_to-3) ;
00938 
00939 #ifdef DYNPROG_TIMING_DETAIL
00940     MyTime.stop() ;
00941     orf_time += MyTime.time_diff_sec() ;
00942 #endif
00943     return true ;
00944 }
00945 
00946 void CDynProg::compute_nbest_paths(int32_t max_num_signals, bool use_orf,
00947         int16_t nbest, bool with_loss, bool with_multiple_sequences)
00948     {
00949 
00950     //FIXME we need checks here if all the fields are of right size
00951     //SG_PRINT("m_seq_len: %i\n", m_seq_len);
00952     //SG_PRINT("m_pos[0]: %i\n", m_pos[0]);
00953     //SG_PRINT("\n");
00954 
00955     //FIXME these variables can go away when compute_nbest_paths uses them
00956     //instead of the local pointers below
00957     const float64_t* seq_array = m_observation_matrix.get_array();
00958     m_scores.resize_array(nbest) ;
00959     m_states.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00960     m_positions.resize_array(nbest, m_observation_matrix.get_dim2()) ;
00961 
00962     for (int32_t i=0; i<nbest; i++)
00963     {
00964         m_scores[i]=-1;
00965         for (int32_t j=0; j<m_observation_matrix.get_dim2(); j++)
00966         {
00967             m_states.element(i,j)=-1;
00968             m_positions.element(i,j)=-1;
00969         }
00970     }
00971     float64_t* prob_nbest=m_scores.get_array();
00972     int32_t* my_state_seq=m_states.get_array();
00973     int32_t* my_pos_seq=m_positions.get_array();
00974 
00975     CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
00976     CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
00977     //END FIXME
00978 
00979 
00980 #ifdef DYNPROG_TIMING
00981         segment_init_time = 0.0 ;
00982         segment_pos_time = 0.0 ;
00983         segment_extend_time = 0.0 ;
00984         segment_clean_time = 0.0 ;
00985         orf_time = 0.0 ;
00986         svm_init_time = 0.0 ;
00987         svm_pos_time = 0.0 ;
00988         svm_clean_time = 0.0 ;
00989         inner_loop_time = 0.0 ;
00990         content_svm_values_time = 0.0 ;
00991         content_plifs_time = 0.0 ;
00992         inner_loop_max_time = 0.0 ;
00993         long_transition_time = 0.0 ;
00994 
00995         MyTime2.start() ;
00996 #endif
00997 
00998         if (!m_svm_arrays_clean)
00999         {
01000             SG_ERROR( "SVM arrays not clean") ;
01001             return ;
01002         }
01003 
01004 #ifdef DYNPROG_DEBUG
01005         m_transition_matrix_a.set_array_name("transition_matrix");
01006         m_transition_matrix_a.display_array();
01007         m_mod_words.display_array() ;
01008         m_sign_words.display_array() ;
01009         m_string_words.display_array() ;
01010         //SG_PRINT("use_orf = %i\n", use_orf) ;
01011 #endif
01012 
01013         int32_t max_look_back = 1000 ;
01014         bool use_svm = false ;
01015 
01016         SG_DEBUG("m_N:%i, m_seq_len:%i, max_num_signals:%i\n",m_N, m_seq_len, max_num_signals) ;
01017 
01018         //for (int32_t i=0;i<m_N*m_seq_len*max_num_signals;i++)
01019       //   SG_PRINT("(%i)%0.2f ",i,seq_array[i]);
01020 
01021         CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase*
01022         PEN.set_array_name("PEN");
01023 
01024         CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d,  CPlifBase*
01025         PEN_state_signals.set_array_name("state_signals");
01026 
01027         CDynamicArray<float64_t> seq(m_N, m_seq_len) ; // 2d
01028         seq.set_array_name("seq") ;
01029         seq.set_const(0) ;
01030 
01031 #ifdef DYNPROG_DEBUG
01032         SG_PRINT("m_num_raw_data: %i\n",m_num_raw_data);
01033         SG_PRINT("m_num_intron_plifs: %i\n", m_num_intron_plifs);
01034         SG_PRINT("m_num_svms: %i\n", m_num_svms);
01035         SG_PRINT("m_num_lin_feat_plifs_cum: ");
01036         for (int i=0; i<=m_num_raw_data; i++)
01037             SG_PRINT(" %i  ",m_num_lin_feat_plifs_cum[i]);
01038         SG_PRINT("\n");
01039 #endif
01040 
01041         float64_t* svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
01042         { // initialize svm_svalue
01043             for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01044                 svm_value[s]=0 ;
01045         }
01046 
01047         { // convert seq_input to seq
01048             // this is independent of the svm values
01049 
01050             CDynamicArray<float64_t> *seq_input=NULL ; // 3d
01051             if (seq_array!=NULL)
01052             {
01053                 //SG_PRINT("using dense seq_array\n") ;
01054 
01055                 seq_input=new CDynamicArray<float64_t>(seq_array, m_N, m_seq_len, max_num_signals) ;
01056                 seq_input->set_array_name("seq_input") ;
01057                 //seq_input.display_array() ;
01058 
01059                 ASSERT(m_seq_sparse1==NULL) ;
01060                 ASSERT(m_seq_sparse2==NULL) ;
01061             } else
01062             {
01063                 SG_PRINT("using sparse seq_array\n") ;
01064 
01065                 ASSERT(m_seq_sparse1!=NULL) ;
01066                 ASSERT(m_seq_sparse2!=NULL) ;
01067                 ASSERT(max_num_signals==2) ;
01068             }
01069 
01070             for (int32_t i=0; i<m_N; i++)
01071                 for (int32_t j=0; j<m_seq_len; j++)
01072                     seq.element(i,j) = 0 ;
01073 
01074             for (int32_t i=0; i<m_N; i++)
01075                 for (int32_t j=0; j<m_seq_len; j++)
01076                     for (int32_t k=0; k<max_num_signals; k++)
01077                     {
01078                         if ((PEN_state_signals.element(i,k)==NULL) && (k==0))
01079                         {
01080                             // no plif
01081                             if (seq_input!=NULL)
01082                                 seq.element(i,j) = seq_input->element(i,j,k) ;
01083                             else
01084                             {
01085                                 if (k==0)
01086                                     seq.element(i,j) = m_seq_sparse1->get_feature(i,j) ;
01087                                 if (k==1)
01088                                     seq.element(i,j) = m_seq_sparse2->get_feature(i,j) ;
01089                             }
01090                             break ;
01091                         }
01092                         if (PEN_state_signals.element(i,k)!=NULL)
01093                         {
01094                             if (seq_input!=NULL)
01095                             {
01096                                 // just one plif
01097                                 if (CMath::is_finite(seq_input->element(i,j,k)))
01098                                     seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(seq_input->element(i,j,k), svm_value) ;
01099                                 else
01100                                     // keep infinity values
01101                                     seq.element(i,j) = seq_input->element(i, j, k) ;
01102                             }
01103                             else
01104                             {
01105                                 if (k==0)
01106                                 {
01107                                     // just one plif
01108                                     if (CMath::is_finite(m_seq_sparse1->get_feature(i,j)))
01109                                         seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse1->get_feature(i,j), svm_value) ;
01110                                     else
01111                                         // keep infinity values
01112                                         seq.element(i,j) = m_seq_sparse1->get_feature(i, j) ;
01113                                 }
01114                                 if (k==1)
01115                                 {
01116                                     // just one plif
01117                                     if (CMath::is_finite(m_seq_sparse2->get_feature(i,j)))
01118                                         seq.element(i,j) += ((CPlifBase*) PEN_state_signals.element(i,k))->lookup_penalty(m_seq_sparse2->get_feature(i,j), svm_value) ;
01119                                     else
01120                                         // keep infinity values
01121                                         seq.element(i,j) = m_seq_sparse2->get_feature(i, j) ;
01122                                 }
01123                             }
01124                         }
01125                         else
01126                             break ;
01127                     }
01128             delete seq_input;
01129             SG_FREE(svm_value);
01130         }
01131 
01132         // allow longer transitions than look_back
01133         bool long_transitions = m_long_transitions ;
01134         CDynamicArray<int32_t> long_transition_content_start_position(m_N,m_N) ; // 2d
01135         long_transition_content_start_position.set_array_name("long_transition_content_start_position");
01136 #ifdef DYNPROG_DEBUG
01137         CDynamicArray<int32_t> long_transition_content_end_position(m_N,m_N) ; // 2d
01138         long_transition_content_end_position.set_array_name("long_transition_content_end_position");
01139 #endif
01140         CDynamicArray<int32_t> long_transition_content_start(m_N,m_N) ; // 2d
01141         long_transition_content_start.set_array_name("long_transition_content_start");
01142 
01143         CDynamicArray<float64_t> long_transition_content_scores(m_N,m_N) ; // 2d
01144         long_transition_content_scores.set_array_name("long_transition_content_scores");
01145 #ifdef DYNPROG_DEBUG
01146 
01147         CDynamicArray<float64_t> long_transition_content_scores_pen(m_N,m_N) ; // 2d
01148         long_transition_content_scores_pen.set_array_name("long_transition_content_scores_pen");
01149 
01150         CDynamicArray<float64_t> long_transition_content_scores_prev(m_N,m_N) ; // 2d
01151         long_transition_content_scores_prev.set_array_name("long_transition_content_scores_prev");
01152 
01153         CDynamicArray<float64_t> long_transition_content_scores_elem(m_N,m_N) ; // 2d
01154         long_transition_content_scores_elem.set_array_name("long_transition_content_scores_elem");
01155 #endif
01156         CDynamicArray<float64_t> long_transition_content_scores_loss(m_N,m_N) ; // 2d
01157         long_transition_content_scores_loss.set_array_name("long_transition_content_scores_loss");
01158 
01159         if (nbest!=1)
01160         {
01161             SG_ERROR("Long transitions are not supported for nbest!=1") ;
01162             long_transitions = false ;
01163         }
01164         long_transition_content_scores.set_const(-CMath::INFTY);
01165 #ifdef DYNPROG_DEBUG
01166         long_transition_content_scores_pen.set_const(0) ;
01167         long_transition_content_scores_elem.set_const(0) ;
01168         long_transition_content_scores_prev.set_const(0) ;
01169 #endif
01170         if (with_loss)
01171             long_transition_content_scores_loss.set_const(0) ;
01172         long_transition_content_start.set_const(0) ;
01173         long_transition_content_start_position.set_const(0) ;
01174 #ifdef DYNPROG_DEBUG
01175         long_transition_content_end_position.set_const(0) ;
01176 #endif
01177 
01178         svm_value = SG_MALLOC(float64_t , m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
01179         { // initialize svm_svalue
01180             for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
01181                 svm_value[s]=0 ;
01182         }
01183 
01184         CDynamicArray<int32_t> look_back(m_N,m_N) ; // 2d
01185         look_back.set_array_name("look_back");
01186         //CDynamicArray<int32_t> look_back_orig(m_N,m_N) ;
01187         //look_back.set_array_name("look_back_orig");
01188 
01189 
01190         { // determine maximal length of look-back
01191             for (int32_t i=0; i<m_N; i++)
01192                 for (int32_t j=0; j<m_N; j++)
01193                 {
01194                     look_back.set_element(INT_MAX, i, j) ;
01195                     //look_back_orig.set_element(INT_MAX, i, j) ;
01196                 }
01197 
01198             for (int32_t j=0; j<m_N; j++)
01199             {
01200                 // only consider transitions that are actually allowed
01201                 const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01202                 const T_STATES *elem_list = trans_list_forward[j] ;
01203 
01204                 for (int32_t i=0; i<num_elem; i++)
01205                 {
01206                     T_STATES ii = elem_list[i] ;
01207 
01208                     CPlifBase *penij=(CPlifBase*) PEN.element(j, ii) ;
01209                     if (penij==NULL)
01210                     {
01211                         if (long_transitions)
01212                         {
01213                             look_back.set_element(m_long_transition_threshold, j, ii) ;
01214                             //look_back_orig.set_element(m_long_transition_max, j, ii) ;
01215                         }
01216                         continue ;
01217                     }
01218 
01219                     /* if the transition is an ORF or we do computation with loss, we have to disable long transitions */
01220                     if ((m_orf_info.element(ii,0)!=-1) || (m_orf_info.element(j,1)!=-1) || (!long_transitions))
01221                     {
01222                         look_back.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01223                         //look_back_orig.set_element(CMath::ceil(penij->get_max_value()), j, ii) ;
01224                         if (CMath::ceil(penij->get_max_value()) > max_look_back)
01225                         {
01226                             SG_DEBUG( "%d %d -> value: %f\n", ii,j,penij->get_max_value());
01227                             max_look_back = (int32_t) (CMath::ceil(penij->get_max_value()));
01228                         }
01229                     }
01230                     else
01231                     {
01232                         look_back.set_element(CMath::min( (int32_t)CMath::ceil(penij->get_max_value()), m_long_transition_threshold ), j, ii) ;
01233                         //look_back_orig.set_element( (int32_t)CMath::ceil(penij->get_max_value()), j, ii) ;
01234                     }
01235 
01236                     if (penij->uses_svm_values())
01237                         use_svm=true ;
01238                 }
01239             }
01240             /* make sure max_look_back is at least as long as a long transition */
01241             if (long_transitions)
01242                 max_look_back = CMath::max(m_long_transition_threshold, max_look_back) ;
01243 
01244             /* make sure max_look_back is not longer than the whole string */
01245             max_look_back = CMath::min(m_genestr.get_dim1(), max_look_back) ;
01246 
01247             int32_t num_long_transitions = 0 ;
01248             for (int32_t i=0; i<m_N; i++)
01249                 for (int32_t j=0; j<m_N; j++)
01250                 {
01251                     if (look_back.get_element(i,j)==m_long_transition_threshold)
01252                         num_long_transitions++ ;
01253                     if (look_back.get_element(i,j)==INT_MAX)
01254                     {
01255                         if (long_transitions)
01256                         {
01257                             look_back.set_element(m_long_transition_threshold, i, j) ;
01258                             //look_back_orig.set_element(m_long_transition_max, i, j) ;
01259                         }
01260                         else
01261                         {
01262                             look_back.set_element(max_look_back, i, j) ;
01263                             //look_back_orig.set_element(m_long_transition_max, i, j) ;
01264                         }
01265                     }
01266                 }
01267             SG_DEBUG("Using %i long transitions\n", num_long_transitions) ;
01268         }
01269         //SG_PRINT("max_look_back: %i \n", max_look_back) ;
01270 
01271         //SG_PRINT("use_svm=%i, genestr_len: \n", use_svm, m_genestr.get_dim1()) ;
01272         SG_DEBUG("use_svm=%i\n", use_svm) ;
01273 
01274         SG_DEBUG("maxlook: %d m_N: %d nbest: %d \n", max_look_back, m_N, nbest);
01275         const int32_t look_back_buflen = (max_look_back*m_N+1)*nbest ;
01276         SG_DEBUG("look_back_buflen=%i\n", look_back_buflen) ;
01277         /*const float64_t mem_use = (float64_t)(m_seq_len*m_N*nbest*(sizeof(T_STATES)+sizeof(int16_t)+sizeof(int32_t))+
01278           look_back_buflen*(2*sizeof(float64_t)+sizeof(int32_t))+
01279           m_seq_len*(sizeof(T_STATES)+sizeof(int32_t))+
01280           m_genestr.get_dim1()*sizeof(bool))/(1024*1024);*/
01281 
01282         //bool is_big = (mem_use>200) || (m_seq_len>5000) ;
01283 
01284         /*if (is_big)
01285           {
01286           SG_DEBUG("calling compute_nbest_paths: m_seq_len=%i, m_N=%i, lookback=%i nbest=%i\n",
01287           m_seq_len, m_N, max_look_back, nbest) ;
01288           SG_DEBUG("allocating %1.2fMB of memory\n",
01289           mem_use) ;
01290           }*/
01291         ASSERT(nbest<32000) ;
01292 
01293         CDynamicArray<float64_t> delta(m_seq_len, m_N, nbest) ; // 3d
01294         delta.set_array_name("delta");
01295         float64_t* delta_array = delta.get_array() ;
01296         //delta.set_const(0) ;
01297 
01298         CDynamicArray<T_STATES> psi(m_seq_len, m_N, nbest) ; // 3d
01299         psi.set_array_name("psi");
01300         //psi.set_const(0) ;
01301 
01302         CDynamicArray<int16_t> ktable(m_seq_len, m_N, nbest) ; // 3d
01303         ktable.set_array_name("ktable");
01304         //ktable.set_const(0) ;
01305 
01306         CDynamicArray<int32_t> ptable(m_seq_len, m_N, nbest) ; // 3d
01307         ptable.set_array_name("ptable");
01308         //ptable.set_const(0) ;
01309 
01310         CDynamicArray<float64_t> delta_end(nbest) ;
01311         delta_end.set_array_name("delta_end");
01312         //delta_end.set_const(0) ;
01313 
01314         CDynamicArray<T_STATES> path_ends(nbest) ;
01315         path_ends.set_array_name("path_ends");
01316         //path_ends.set_const(0) ;
01317 
01318         CDynamicArray<int16_t> ktable_end(nbest) ;
01319         ktable_end.set_array_name("ktable_end");
01320         //ktable_end.set_const(0) ;
01321 
01322         float64_t * fixedtempvv=SG_MALLOC(float64_t, look_back_buflen);
01323         memset(fixedtempvv, 0, look_back_buflen*sizeof(float64_t)) ;
01324         int32_t * fixedtempii=SG_MALLOC(int32_t, look_back_buflen);
01325         memset(fixedtempii, 0, look_back_buflen*sizeof(int32_t)) ;
01326 
01327         CDynamicArray<float64_t> oldtempvv(look_back_buflen) ;
01328         oldtempvv.set_array_name("oldtempvv");
01329 
01330         CDynamicArray<float64_t> oldtempvv2(look_back_buflen) ;
01331         oldtempvv2.set_array_name("oldtempvv2");
01332         //oldtempvv.set_const(0) ;
01333         //oldtempvv.display_size() ;
01334 
01335         CDynamicArray<int32_t> oldtempii(look_back_buflen) ;
01336         oldtempii.set_array_name("oldtempii");
01337 
01338         CDynamicArray<int32_t> oldtempii2(look_back_buflen) ;
01339         oldtempii2.set_array_name("oldtempii2");
01340         //oldtempii.set_const(0) ;
01341 
01342         CDynamicArray<T_STATES> state_seq(m_seq_len) ;
01343         state_seq.set_array_name("state_seq");
01344         //state_seq.set_const(0) ;
01345 
01346         CDynamicArray<int32_t> pos_seq(m_seq_len) ;
01347         pos_seq.set_array_name("pos_seq");
01348         //pos_seq.set_const(0) ;
01349 
01350 
01351         m_dict_weights.set_array_name("dict_weights") ;
01352         m_word_degree.set_array_name("word_degree") ;
01353         m_cum_num_words.set_array_name("cum_num_words") ;
01354         m_num_words.set_array_name("num_words") ;
01355         //word_used.set_array_name("word_used") ;
01356         //svm_values_unnormalized.set_array_name("svm_values_unnormalized") ;
01357         //m_svm_pos_start.set_array_name("svm_pos_start") ;
01358         m_num_unique_words.set_array_name("num_unique_words") ;
01359 
01360         PEN.set_array_name("PEN") ;
01361         seq.set_array_name("seq") ;
01362 
01363         delta.set_array_name("delta") ;
01364         psi.set_array_name("psi") ;
01365         ktable.set_array_name("ktable") ;
01366         ptable.set_array_name("ptable") ;
01367         delta_end.set_array_name("delta_end") ;
01368         path_ends.set_array_name("path_ends") ;
01369         ktable_end.set_array_name("ktable_end") ;
01370 
01371 #ifdef USE_TMP_ARRAYCLASS
01372         fixedtempvv.set_array_name("fixedtempvv") ;
01373         fixedtempii.set_array_name("fixedtempvv") ;
01374 #endif
01375 
01376         oldtempvv.set_array_name("oldtempvv") ;
01377         oldtempvv2.set_array_name("oldtempvv2") ;
01378         oldtempii.set_array_name("oldtempii") ;
01379         oldtempii2.set_array_name("oldtempii2") ;
01380 
01381 
01383 
01384 #ifdef DYNPROG_DEBUG
01385         state_seq.display_size() ;
01386         pos_seq.display_size() ;
01387 
01388         m_dict_weights.display_size() ;
01389         m_word_degree.display_array() ;
01390         m_cum_num_words.display_array() ;
01391         m_num_words.display_array() ;
01392         //word_used.display_size() ;
01393         //svm_values_unnormalized.display_size() ;
01394         //m_svm_pos_start.display_array() ;
01395         m_num_unique_words.display_array() ;
01396 
01397         PEN.display_size() ;
01398         PEN_state_signals.display_size() ;
01399         seq.display_size() ;
01400         m_orf_info.display_size() ;
01401 
01402         //m_genestr_stop.display_size() ;
01403         delta.display_size() ;
01404         psi.display_size() ;
01405         ktable.display_size() ;
01406         ptable.display_size() ;
01407         delta_end.display_size() ;
01408         path_ends.display_size() ;
01409         ktable_end.display_size() ;
01410 
01411 #ifdef USE_TMP_ARRAYCLASS
01412         fixedtempvv.display_size() ;
01413         fixedtempii.display_size() ;
01414 #endif
01415 
01416         //oldtempvv.display_size() ;
01417         //oldtempii.display_size() ;
01418 
01419         state_seq.display_size() ;
01420         pos_seq.display_size() ;
01421 
01422         //seq.set_const(0) ;
01423 
01424 #endif //DYNPROG_DEBUG
01425 
01427 
01428 
01429 
01430         {
01431             for (int32_t s=0; s<m_num_svms; s++)
01432                 ASSERT(m_string_words_array[s]<1)  ;
01433         }
01434 
01435 
01436         //CDynamicArray<int32_t*> trans_matrix_svms(m_N,m_N); // 2d
01437         //CDynamicArray<int32_t> trans_matrix_num_svms(m_N,m_N); // 2d
01438 
01439         { // initialization
01440 
01441             for (T_STATES i=0; i<m_N; i++)
01442             {
01443                 //delta.element(0, i, 0) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
01444                 delta.element(delta_array, 0, i, 0, m_seq_len, m_N) = get_p(i) + seq.element(i,0) ;        // get_p defined in HMM.h to be equiv to initial_state_distribution
01445                 psi.element(0,i,0)   = 0 ;
01446                 if (nbest>1)
01447                     ktable.element(0,i,0)  = 0 ;
01448                 ptable.element(0,i,0)  = 0 ;
01449                 for (int16_t k=1; k<nbest; k++)
01450                 {
01451                     int32_t dim1, dim2, dim3 ;
01452                     delta.get_array_size(dim1, dim2, dim3) ;
01453                     //SG_DEBUG("i=%i, k=%i -- %i, %i, %i\n", i, k, dim1, dim2, dim3) ;
01454                     //delta.element(0, i, k)    = -CMath::INFTY ;
01455                     delta.element(delta_array, 0, i, k, m_seq_len, m_N)    = -CMath::INFTY ;
01456                     psi.element(0,i,0)      = 0 ;                  // <--- what's this for?
01457                     if (nbest>1)
01458                         ktable.element(0,i,k)     = 0 ;
01459                     ptable.element(0,i,k)     = 0 ;
01460                 }
01461                 /*
01462                    for (T_STATES j=0; j<m_N; j++)
01463                    {
01464                    CPlifBase * penalty = PEN.element(i,j) ;
01465                    int32_t num_current_svms=0;
01466                    int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
01467                    if (penalty)
01468                    {
01469                    SG_PRINT("trans %i -> %i \n",i,j);
01470                    penalty->get_used_svms(&num_current_svms, svm_ids);
01471                    trans_matrix_svms.set_element(svm_ids,i,j);
01472                    for (int32_t l=0;l<num_current_svms;l++)
01473                    SG_PRINT("svm_ids[%i]: %i \n",l,svm_ids[l]);
01474                    trans_matrix_num_svms.set_element(num_current_svms,i,j);
01475                    }
01476                    }
01477                    */
01478 
01479             }
01480         }
01481 
01482         SG_DEBUG("START_RECURSION \n\n");
01483 
01484         // recursion
01485         for (int32_t t=1; t<m_seq_len; t++)
01486         {
01487             //if (is_big && t%(1+(m_seq_len/1000))==1)
01488             //  SG_PROGRESS(t, 0, m_seq_len);
01489             //SG_PRINT("%i\n", t) ;
01490 
01491             for (T_STATES j=0; j<m_N; j++)
01492             {
01493                 if (seq.element(j,t)<=-1e20)
01494                 { // if we cannot observe the symbol here, then we can omit the rest
01495                     for (int16_t k=0; k<nbest; k++)
01496                     {
01497                         delta.element(delta_array, t, j, k, m_seq_len, m_N)    = seq.element(j,t) ;
01498                         psi.element(t,j,k)         = 0 ;
01499                         if (nbest>1)
01500                             ktable.element(t,j,k)  = 0 ;
01501                         ptable.element(t,j,k)      = 0 ;
01502                     }
01503                 }
01504                 else
01505                 {
01506                     const T_STATES num_elem   = trans_list_forward_cnt[j] ;
01507                     const T_STATES *elem_list = trans_list_forward[j] ;
01508                     const float64_t *elem_val      = trans_list_forward_val[j] ;
01509                     const int32_t *elem_id      = trans_list_forward_id[j] ;
01510 
01511                     int32_t fixed_list_len = 0 ;
01512                     float64_t fixedtempvv_ = CMath::INFTY ;
01513                     int32_t fixedtempii_ = 0 ;
01514                     bool fixedtemplong = false ;
01515 
01516                     for (int32_t i=0; i<num_elem; i++)
01517                     {
01518                         T_STATES ii = elem_list[i] ;
01519 
01520                         const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ;
01521 
01522                         /*int32_t look_back = max_look_back ;
01523                           if (0)
01524                           { // find lookback length
01525                           CPlifBase *pen = (CPlifBase*) penalty ;
01526                           if (pen!=NULL)
01527                           look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
01528                           if (look_back>=1e6)
01529                           SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
01530                           ASSERT(look_back<1e6);
01531                           } */
01532 
01533                         int32_t look_back_ = look_back.element(j, ii) ;
01534 
01535                         int32_t orf_from = m_orf_info.element(ii,0) ;
01536                         int32_t orf_to   = m_orf_info.element(j,1) ;
01537                         if((orf_from!=-1)!=(orf_to!=-1))
01538                             SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01539                         ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01540 
01541                         int32_t orf_target = -1 ;
01542                         if (orf_from!=-1)
01543                         {
01544                             orf_target=orf_to-orf_from ;
01545                             if (orf_target<0)
01546                                 orf_target+=3 ;
01547                             ASSERT(orf_target>=0 && orf_target<3) ;
01548                         }
01549 
01550                         int32_t orf_last_pos = m_pos[t] ;
01551 #ifdef DYNPROG_TIMING
01552                         MyTime3.start() ;
01553 #endif
01554                         int32_t num_ok_pos = 0 ;
01555 
01556                         for (int32_t ts=t-1; ts>=0 && m_pos[t]-m_pos[ts]<=look_back_; ts--)
01557                         {
01558                             bool ok ;
01559                             //int32_t plen=t-ts;
01560 
01561                             /*for (int32_t s=0; s<m_num_svms; s++)
01562                               if ((fabs(svs.svm_values[s*svs.seqlen+plen]-svs2.svm_values[s*svs.seqlen+plen])>1e-6) ||
01563                               (fabs(svs.svm_values[s*svs.seqlen+plen]-svs3.svm_values[s*svs.seqlen+plen])>1e-6))
01564                               {
01565                               SG_DEBUG( "s=%i, t=%i, ts=%i, %1.5e, %1.5e, %1.5e\n", s, t, ts, svs.svm_values[s*svs.seqlen+plen], svs2.svm_values[s*svs.seqlen+plen], svs3.svm_values[s*svs.seqlen+plen]);
01566                               }*/
01567 
01568                             if (orf_target==-1)
01569                                 ok=true ;
01570                             else if (m_pos[ts]!=-1 && (m_pos[t]-m_pos[ts])%3==orf_target)
01571                                 ok=(!use_orf) || extend_orf(orf_from, orf_to, m_pos[ts], orf_last_pos, m_pos[t]) ;
01572                             else
01573                                 ok=false ;
01574 
01575                             if (ok)
01576                             {
01577 
01578                                 float64_t segment_loss = 0.0 ;
01579                                 if (with_loss)
01580                                 {
01581                                     segment_loss = m_seg_loss_obj->get_segment_loss(ts, t, elem_id[i]);
01582                                     //if (segment_loss!=segment_loss2)
01583                                         //SG_PRINT("segment_loss:%f segment_loss2:%f\n", segment_loss, segment_loss2);
01584                                 }
01586                                 // BEST_PATH_TRANS
01588 
01589                                 int32_t frame = orf_from;//m_orf_info.element(ii,0);
01590                                 lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01591 
01592                                 float64_t pen_val = 0.0 ;
01593                                 if (penalty)
01594                                 {
01595 #ifdef DYNPROG_TIMING_DETAIL
01596                                     MyTime.start() ;
01597 #endif
01598                                     pen_val = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01599 
01600 #ifdef DYNPROG_TIMING_DETAIL
01601                                     MyTime.stop() ;
01602                                     content_plifs_time += MyTime.time_diff_sec() ;
01603 #endif
01604                                 }
01605 
01606 #ifdef DYNPROG_TIMING_DETAIL
01607                                 MyTime.start() ;
01608 #endif
01609                                 num_ok_pos++ ;
01610 
01611                                 if (nbest==1)
01612                                 {
01613                                     float64_t  val        = elem_val[i] + pen_val ;
01614                                     if (with_loss)
01615                                         val              += segment_loss ;
01616 
01617                                     float64_t mval = -(val + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N)) ;
01618 
01619                                     if (mval<fixedtempvv_)
01620                                     {
01621                                         fixedtempvv_ = mval ;
01622                                         fixedtempii_ = ii + ts*m_N;
01623                                         fixed_list_len = 1 ;
01624                                         fixedtemplong = false ;
01625                                     }
01626                                 }
01627                                 else
01628                                 {
01629                                     for (int16_t diff=0; diff<nbest; diff++)
01630                                     {
01631                                         float64_t  val        = elem_val[i]  ;
01632                                         val                  += pen_val ;
01633                                         if (with_loss)
01634                                             val              += segment_loss ;
01635 
01636                                         float64_t mval = -(val + delta.element(delta_array, ts, ii, diff, m_seq_len, m_N)) ;
01637 
01638                                         /* only place -val in fixedtempvv if it is one of the nbest lowest values in there */
01639                                         /* fixedtempvv[i], i=0:nbest-1, is sorted so that fixedtempvv[0] <= fixedtempvv[1] <= ...*/
01640                                         /* fixed_list_len has the number of elements in fixedtempvv */
01641 
01642                                         if ((fixed_list_len < nbest) || ((0==fixed_list_len) || (mval < fixedtempvv[fixed_list_len-1])))
01643                                         {
01644                                             if ( (fixed_list_len<nbest) && ((0==fixed_list_len) || (mval>fixedtempvv[fixed_list_len-1])) )
01645                                             {
01646                                                 fixedtempvv[fixed_list_len] = mval ;
01647                                                 fixedtempii[fixed_list_len] = ii + diff*m_N + ts*m_N*nbest;
01648                                                 fixed_list_len++ ;
01649                                             }
01650                                             else  // must have mval < fixedtempvv[fixed_list_len-1]
01651                                             {
01652                                                 int32_t addhere = fixed_list_len;
01653                                                 while ((addhere > 0) && (mval < fixedtempvv[addhere-1]))
01654                                                     addhere--;
01655 
01656                                                 // move everything from addhere+1 one forward
01657                                                 for (int32_t jj=fixed_list_len-1; jj>addhere; jj--)
01658                                                 {
01659                                                     fixedtempvv[jj] = fixedtempvv[jj-1];
01660                                                     fixedtempii[jj] = fixedtempii[jj-1];
01661                                                 }
01662 
01663                                                 fixedtempvv[addhere] = mval;
01664                                                 fixedtempii[addhere] = ii + diff*m_N + ts*m_N*nbest;
01665 
01666                                                 if (fixed_list_len < nbest)
01667                                                     fixed_list_len++;
01668                                             }
01669                                         }
01670                                     }
01671                                 }
01672 #ifdef DYNPROG_TIMING_DETAIL
01673                                 MyTime.stop() ;
01674                                 inner_loop_max_time += MyTime.time_diff_sec() ;
01675 #endif
01676                             }
01677                         }
01678 #ifdef DYNPROG_TIMING
01679                         MyTime3.stop() ;
01680                         inner_loop_time += MyTime3.time_diff_sec() ;
01681 #endif
01682                     }
01683                     for (int32_t i=0; i<num_elem; i++)
01684                     {
01685                         T_STATES ii = elem_list[i] ;
01686 
01687                         const CPlifBase* penalty = (CPlifBase*) PEN.element(j,ii) ;
01688 
01689                         /*int32_t look_back = max_look_back ;
01690                           if (0)
01691                           { // find lookback length
01692                           CPlifBase *pen = (CPlifBase*) penalty ;
01693                           if (pen!=NULL)
01694                           look_back=(int32_t) (CMath::ceil(pen->get_max_value()));
01695                           if (look_back>=1e6)
01696                           SG_PRINT("%i,%i -> %d from %ld\n", j, ii, look_back, (long)pen) ;
01697                           ASSERT(look_back<1e6);
01698                           } */
01699 
01700                         int32_t look_back_ = look_back.element(j, ii) ;
01701                         //int32_t look_back_orig_ = look_back_orig.element(j, ii) ;
01702 
01703                         int32_t orf_from = m_orf_info.element(ii,0) ;
01704                         int32_t orf_to   = m_orf_info.element(j,1) ;
01705                         if((orf_from!=-1)!=(orf_to!=-1))
01706                             SG_DEBUG("j=%i  ii=%i  orf_from=%i orf_to=%i p=%1.2f\n", j, ii, orf_from, orf_to, elem_val[i]) ;
01707                         ASSERT((orf_from!=-1)==(orf_to!=-1)) ;
01708 
01709                         int32_t orf_target = -1 ;
01710                         if (orf_from!=-1)
01711                         {
01712                             orf_target=orf_to-orf_from ;
01713                             if (orf_target<0)
01714                                 orf_target+=3 ;
01715                             ASSERT(orf_target>=0 && orf_target<3) ;
01716                         }
01717 
01718                         //int32_t loss_last_pos = t ;
01719                         //float64_t last_loss = 0.0 ;
01720 
01721 #ifdef DYNPROG_TIMING
01722                         MyTime3.start() ;
01723 #endif
01724 
01725                         /* long transition stuff */
01726                         /* only do this, if
01727                          * this feature is enabled
01728                          * this is not a transition with ORF restrictions
01729                          * the loss is switched off
01730                          * nbest=1
01731                          */
01732 #ifdef DYNPROG_TIMING
01733                         MyTime3.start() ;
01734 #endif
01735                         // long transitions, only when not considering ORFs
01736                         if ( long_transitions && orf_target==-1 && look_back_ == m_long_transition_threshold )
01737                         {
01738 
01739                             // update table for 5' part  of the long segment
01740 
01741                             int32_t start = long_transition_content_start.get_element(ii, j) ;
01742                             int32_t end_5p_part = start ;
01743                             for (int32_t start_5p_part=start; m_pos[t]-m_pos[start_5p_part] > m_long_transition_threshold ; start_5p_part++)
01744                             {
01745                                 // find end_5p_part, which is greater than start_5p_part and at least m_long_transition_threshold away
01746                                 while (end_5p_part<=t && m_pos[end_5p_part+1]-m_pos[start_5p_part]<=m_long_transition_threshold)
01747                                     end_5p_part++ ;
01748 
01749                                 ASSERT(m_pos[end_5p_part+1]-m_pos[start_5p_part] > m_long_transition_threshold || end_5p_part==t) ;
01750                                 ASSERT(m_pos[end_5p_part]-m_pos[start_5p_part] <= m_long_transition_threshold) ;
01751 
01752                                 float64_t pen_val = 0.0;
01753                                 /* recompute penalty, if necessary */
01754                                 if (penalty)
01755                                 {
01756                                     int32_t frame = m_orf_info.element(ii,0);
01757                                     lookup_content_svm_values(start_5p_part, end_5p_part, m_pos[start_5p_part], m_pos[end_5p_part], svm_value, frame); // * t -> end_5p_part
01758                                     pen_val = penalty->lookup_penalty(m_pos[end_5p_part]-m_pos[start_5p_part], svm_value) ;
01759                                 }
01760 
01761                                 /*if (m_pos[start_5p_part]==1003)
01762                                   {
01763                                   SG_PRINT("Part1: %i - %i   vs  %i - %i\n", m_pos[t], m_pos[ts], m_pos[end_5p_part], m_pos[start_5p_part]) ;
01764                                   SG_PRINT("Part1: ts=%i  t=%i  start_5p_part=%i  m_seq_len=%i\n", m_pos[ts], m_pos[t], m_pos[start_5p_part], m_seq_len) ;
01765                                   }*/
01766 
01767                                 float64_t mval_trans = -( elem_val[i] + pen_val*0.5 + delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N) ) ;
01768                                 //float64_t mval_trans = -( elem_val[i] + delta.element(delta_array, ts, ii, 0, m_seq_len, m_N) ) ; // enable this for the incomplete extra check
01769 
01770                                 float64_t segment_loss_part1=0.0 ;
01771                                 if (with_loss)
01772                                 {  // this is the loss from the start of the long segment (5' part + middle section)
01773 
01774                                     segment_loss_part1 = m_seg_loss_obj->get_segment_loss(start_5p_part /*long_transition_content_start_position.get_element(ii,j)*/, end_5p_part, elem_id[i]); // * unsure
01775 
01776                                     mval_trans -= segment_loss_part1 ;
01777                                 }
01778 
01779 
01780                                 if (0)//m_pos[end_5p_part] - m_pos[long_transition_content_start_position.get_element(ii, j)] > look_back_orig_/*m_long_transition_max*/)
01781                                 {
01782                                     // this restricts the maximal length of segments,
01783                                     // but the current implementation is not valid since the
01784                                     // long transition is discarded without loocking if there
01785                                     // is a second best long transition in between
01786                                     long_transition_content_scores.set_element(-CMath::INFTY, ii, j) ;
01787                                     long_transition_content_start_position.set_element(0, ii, j) ;
01788                                     if (with_loss)
01789                                         long_transition_content_scores_loss.set_element(0.0, ii, j) ;
01790 #ifdef DYNPROG_DEBUG
01791                                     long_transition_content_scores_pen.set_element(0.0, ii, j) ;
01792                                     long_transition_content_scores_elem.set_element(0.0, ii, j) ;
01793                                     long_transition_content_scores_prev.set_element(0.0, ii, j) ;
01794                                     long_transition_content_end_position.set_element(0, ii, j) ;
01795 #endif
01796                                 }
01797                                 if (with_loss)
01798                                 {
01799                                     float64_t old_loss = long_transition_content_scores_loss.get_element(ii, j) ;
01800                                     float64_t new_loss = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), end_5p_part, elem_id[i]);
01801                                     float64_t score = long_transition_content_scores.get_element(ii, j) - old_loss + new_loss ;
01802                                     long_transition_content_scores.set_element(score, ii, j) ;
01803                                     long_transition_content_scores_loss.set_element(new_loss, ii, j) ;
01804 #ifdef DYNPROG_DEBUG
01805                                     long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01806 #endif
01807 
01808                                 }
01809                                 if (-long_transition_content_scores.get_element(ii, j) > mval_trans )
01810                                 {
01811                                     /* then the old long transition is either too far away or worse than the current one */
01812                                     long_transition_content_scores.set_element(-mval_trans, ii, j) ;
01813                                     long_transition_content_start_position.set_element(start_5p_part, ii, j) ;
01814                                     if (with_loss)
01815                                         long_transition_content_scores_loss.set_element(segment_loss_part1, ii, j) ;
01816 #ifdef DYNPROG_DEBUG
01817                                     long_transition_content_scores_pen.set_element(pen_val*0.5, ii, j) ;
01818                                     long_transition_content_scores_elem.set_element(elem_val[i], ii, j) ;
01819                                     long_transition_content_scores_prev.set_element(delta.element(delta_array, start_5p_part, ii, 0, m_seq_len, m_N), ii, j) ;
01820                                     /*ASSERT(fabs(long_transition_content_scores.get_element(ii, j)-(long_transition_content_scores_pen.get_element(ii, j) +
01821                                       long_transition_content_scores_elem.get_element(ii, j) +
01822                                       long_transition_content_scores_prev.get_element(ii, j)))<1e-6) ;*/
01823                                     long_transition_content_end_position.set_element(end_5p_part, ii, j) ;
01824 #endif
01825                                 }
01826                                 //
01827                                 // this sets the position where the search for better 5'parts is started the next time
01828                                 // whithout this the prediction takes ages
01829                                 //
01830                                 long_transition_content_start.set_element(start_5p_part, ii, j) ;
01831                             }
01832 
01833                             // consider the 3' part at the end of the long segment:
01834                             // * with length = m_long_transition_threshold
01835                             // * content prediction and loss only for this part
01836 
01837                             // find ts > 0 with distance from m_pos[t] greater m_long_transition_threshold
01838                             // precompute: only depends on t
01839                             int ts = t;
01840                             while (ts>0 && m_pos[t]-m_pos[ts-1] <= m_long_transition_threshold)
01841                                 ts-- ;
01842 
01843                             if (ts>0)
01844                             {
01845                                 ASSERT((m_pos[t]-m_pos[ts-1] > m_long_transition_threshold) && (m_pos[t]-m_pos[ts] <= m_long_transition_threshold)) ;
01846 
01847 
01848                                 /* only consider this transition, if the right position was found */
01849                                 float pen_val_3p = 0.0 ;
01850                                 if (penalty)
01851                                 {
01852                                     int32_t frame = orf_from ; //m_orf_info.element(ii, 0);
01853                                     lookup_content_svm_values(ts, t, m_pos[ts], m_pos[t], svm_value, frame);
01854                                     pen_val_3p = penalty->lookup_penalty(m_pos[t]-m_pos[ts], svm_value) ;
01855                                 }
01856 
01857                                 float64_t mval = -(long_transition_content_scores.get_element(ii, j) + pen_val_3p*0.5) ;
01858 
01859                                 {
01860 #ifdef DYNPROG_DEBUG
01861                                     float64_t segment_loss_part2=0.0 ;
01862                                     float64_t segment_loss_part1=0.0 ;
01863 #endif
01864                                     float64_t segment_loss_total=0.0 ;
01865 
01866                                     if (with_loss)
01867                                     {   // this is the loss for the 3' end fragment of the segment
01868                                         // (the 5' end and the middle section loss is already contained in mval)
01869 
01870 #ifdef DYNPROG_DEBUG
01871                                         // this is an alternative, which should be identical, if the loss is additive
01872                                         segment_loss_part2 = m_seg_loss_obj->get_segment_loss_extend(long_transition_content_end_position.get_element(ii,j), t, elem_id[i]);
01873                                         //mval -= segment_loss_part2 ;
01874                                         segment_loss_part1 = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), long_transition_content_end_position.get_element(ii,j), elem_id[i]);
01875 #endif
01876                                         segment_loss_total = m_seg_loss_obj->get_segment_loss(long_transition_content_start_position.get_element(ii,j), t, elem_id[i]);
01877                                         mval -= (segment_loss_total-long_transition_content_scores_loss.get_element(ii, j)) ;
01878                                     }
01879 
01880 #ifdef DYNPROG_DEBUG
01881                                     if (m_pos[t]==10108 ||m_pos[t]==12802 ||m_pos[t]== 12561)
01882                                     {
01883                                         SG_PRINT("Part2: %i,%i,%i: val=%1.6f  pen_val_3p*0.5=%1.6f (t=%i, ts=%i, ts-1=%i, ts+1=%i); scores=%1.6f (pen=%1.6f,prev=%1.6f,elem=%1.6f,loss=%1.1f), positions=%i,%i,%i,  loss=%1.1f/%1.1f (%i,%i)\n",
01884                                                  m_pos[t], j, ii, -mval, 0.5*pen_val_3p, m_pos[t], m_pos[ts], m_pos[ts-1], m_pos[ts+1],
01885                                                  long_transition_content_scores.get_element(ii, j),
01886                                                  long_transition_content_scores_pen.get_element(ii, j),
01887                                                  long_transition_content_scores_prev.get_element(ii, j),
01888                                                  long_transition_content_scores_elem.get_element(ii, j),
01889                                                  long_transition_content_scores_loss.get_element(ii, j),
01890                                                  m_pos[long_transition_content_start_position.get_element(ii,j)],
01891                                                  m_pos[long_transition_content_end_position.get_element(ii,j)],
01892                                                  m_pos[long_transition_content_start.get_element(ii,j)], segment_loss_part2, segment_loss_total, long_transition_content_start_position.get_element(ii,j), t) ;
01893                                         SG_PRINT("fixedtempvv_: %1.6f, from_state:%i from_pos:%i\n ",-fixedtempvv_, (fixedtempii_%m_N), m_pos[(fixedtempii_-(fixedtempii_%(m_N*nbest)))/(m_N*nbest)] );
01894                                     }
01895 
01896                                     if (fabs(segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total)>1e-3)
01897                                     {
01898                                         SG_ERROR("LOSS: total=%1.1f (%i-%i)  part1=%1.1f/%1.1f (%i-%i)  part2=%1.1f (%i-%i)  sum=%1.1f  diff=%1.1f\n",
01899                                                  segment_loss_total, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[t],
01900                                                  long_transition_content_scores_loss.get_element(ii, j), segment_loss_part1, m_pos[long_transition_content_start_position.get_element(ii,j)], m_pos[long_transition_content_end_position.get_element(ii,j)],
01901                                                  segment_loss_part2, m_pos[long_transition_content_end_position.get_element(ii,j)], m_pos[t],
01902                                                  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j),
01903                                                  segment_loss_part2+long_transition_content_scores_loss.get_element(ii, j) - segment_loss_total) ;
01904                                     }
01905 #endif
01906                                 }
01907 
01908                                 // prefer simpler version to guarantee optimality
01909                                 //
01910                                 // original:
01911                                 /* if ((mval < fixedtempvv_) &&
01912                                     (m_pos[t] - m_pos[long_transition_content_start_position.get_element(ii, j)])<=look_back_orig_) */
01913                                 if (mval < fixedtempvv_)
01914                                 {
01915                                     /* then the long transition is better than the short one => replace it */
01916                                     int32_t fromtjk =  fixedtempii_ ;
01917                                     /*SG_PRINT("%i,%i: Long transition (%1.5f=-(%1.5f+%1.5f+%1.5f+%1.5f), %i) to m_pos %i better than short transition (%1.5f,%i) to m_pos %i \n",
01918                                       m_pos[t], j,
01919                                       mval, pen_val_3p*0.5, long_transition_content_scores_pen.get_element(ii, j), long_transition_content_scores_elem.get_element(ii, j), long_transition_content_scores_prev.get_element(ii, j), ii,
01920                                       m_pos[long_transition_content_position.get_element(ii, j)],
01921                                       fixedtempvv_, (fromtjk%m_N), m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]) ;*/
01922                                     ASSERT((fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)==0 || m_pos[(fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest)]>=m_pos[long_transition_content_start_position.get_element(ii, j)] || fixedtemplong) ;
01923 
01924                                     fixedtempvv_ = mval ;
01925                                     fixedtempii_ = ii + m_N*long_transition_content_start_position.get_element(ii, j) ;
01926                                     fixed_list_len = 1 ;
01927                                     fixedtemplong = true ;
01928                                 }
01929                             }
01930                         }
01931                     }
01932 #ifdef DYNPROG_TIMING
01933                     MyTime3.stop() ;
01934                     long_transition_time += MyTime3.time_diff_sec() ;
01935 #endif
01936 
01937 
01938                     int32_t numEnt = fixed_list_len;
01939 
01940                     float64_t minusscore;
01941                     int64_t fromtjk;
01942 
01943                     for (int16_t k=0; k<nbest; k++)
01944                     {
01945                         if (k<numEnt)
01946                         {
01947                             if (nbest==1)
01948                             {
01949                                 minusscore = fixedtempvv_ ;
01950                                 fromtjk = fixedtempii_ ;
01951                             }
01952                             else
01953                             {
01954                                 minusscore = fixedtempvv[k];
01955                                 fromtjk = fixedtempii[k];
01956                             }
01957 
01958                             delta.element(delta_array, t, j, k, m_seq_len, m_N)    = -minusscore + seq.element(j,t);
01959                             psi.element(t,j,k)      = (fromtjk%m_N) ;
01960                             if (nbest>1)
01961                                 ktable.element(t,j,k)   = (fromtjk%(m_N*nbest)-psi.element(t,j,k))/m_N ;
01962                             ptable.element(t,j,k)   = (fromtjk-(fromtjk%(m_N*nbest)))/(m_N*nbest) ;
01963                         }
01964                         else
01965                         {
01966                             delta.element(delta_array, t, j, k, m_seq_len, m_N)    = -CMath::INFTY ;
01967                             psi.element(t,j,k)      = 0 ;
01968                             if (nbest>1)
01969                                 ktable.element(t,j,k)     = 0 ;
01970                             ptable.element(t,j,k)     = 0 ;
01971                         }
01972                     }
01973                 }
01974             }
01975         }
01976         { //termination
01977             int32_t list_len = 0 ;
01978             for (int16_t diff=0; diff<nbest; diff++)
01979             {
01980                 for (T_STATES i=0; i<m_N; i++)
01981                 {
01982                     oldtempvv[list_len] = -(delta.element(delta_array, (m_seq_len-1), i, diff, m_seq_len, m_N)+get_q(i)) ;
01983                     oldtempii[list_len] = i + diff*m_N ;
01984                     list_len++ ;
01985                 }
01986             }
01987 
01988             CMath::nmin(oldtempvv.get_array(), oldtempii.get_array(), list_len, nbest) ;
01989 
01990             for (int16_t k=0; k<nbest; k++)
01991             {
01992                 delta_end.element(k) = -oldtempvv[k] ;
01993                 path_ends.element(k) = (oldtempii[k]%m_N) ;
01994                 if (nbest>1)
01995                     ktable_end.element(k) = (oldtempii[k]-path_ends.element(k))/m_N ;
01996             }
01997 
01998 
01999         }
02000 
02001         { //state sequence backtracking
02002             for (int16_t k=0; k<nbest; k++)
02003             {
02004                 prob_nbest[k]= delta_end.element(k) ;
02005 
02006                 int32_t i         = 0 ;
02007                 state_seq[i]  = path_ends.element(k) ;
02008                 int16_t q   = 0 ;
02009                 if (nbest>1)
02010                     q=ktable_end.element(k) ;
02011                 pos_seq[i]    = m_seq_len-1 ;
02012 
02013                 while (pos_seq[i]>0)
02014                 {
02015                     ASSERT(i+1<m_seq_len);
02016                     //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02017                     state_seq[i+1] = psi.element(pos_seq[i], state_seq[i], q);
02018                     pos_seq[i+1]   = ptable.element(pos_seq[i], state_seq[i], q) ;
02019                     if (nbest>1)
02020                         q              = ktable.element(pos_seq[i], state_seq[i], q) ;
02021                     i++ ;
02022                 }
02023                 //SG_DEBUG("s=%i p=%i q=%i\n", state_seq[i], pos_seq[i], q) ;
02024                 int32_t num_states = i+1 ;
02025                 for (i=0; i<num_states;i++)
02026                 {
02027                     my_state_seq[i+k*m_seq_len] = state_seq[num_states-i-1] ;
02028                     my_pos_seq[i+k*m_seq_len]   = pos_seq[num_states-i-1] ;
02029                 }
02030                 if (num_states<m_seq_len)
02031                 {
02032                     my_state_seq[num_states+k*m_seq_len]=-1 ;
02033                     my_pos_seq[num_states+k*m_seq_len]=-1 ;
02034                 }
02035             }
02036         }
02037 
02038         //if (is_big)
02039         //  SG_PRINT( "DONE.     \n") ;
02040 
02041 
02042 #ifdef DYNPROG_TIMING
02043         MyTime2.stop() ;
02044 
02045         //if (is_big)
02046         SG_PRINT("Timing:  orf=%1.2f s \n Segment_init=%1.2f s Segment_pos=%1.2f s  Segment_extend=%1.2f s Segment_clean=%1.2f s\nsvm_init=%1.2f s  svm_pos=%1.2f  svm_clean=%1.2f\n  content_svm_values_time=%1.2f  content_plifs_time=%1.2f\ninner_loop_max_time=%1.2f inner_loop=%1.2f long_transition_time=%1.2f\n total=%1.2f\n", orf_time, segment_init_time, segment_pos_time, segment_extend_time, segment_clean_time, svm_init_time, svm_pos_time, svm_clean_time, content_svm_values_time, content_plifs_time, inner_loop_max_time, inner_loop_time, long_transition_time, MyTime2.time_diff_sec()) ;
02047 #endif
02048 
02049         SG_FREE(fixedtempvv);
02050         SG_FREE(fixedtempii);
02051     }
02052 
02053 
02054 void CDynProg::best_path_trans_deriv(
02055     int32_t *my_state_seq, int32_t *my_pos_seq,
02056     int32_t my_seq_len, const float64_t *seq_array, int32_t max_num_signals)
02057 {
02058     m_initial_state_distribution_p_deriv.resize_array(m_N) ;
02059     m_end_state_distribution_q_deriv.resize_array(m_N) ;
02060     m_transition_matrix_a_deriv.resize_array(m_N,m_N) ;
02061     //m_my_scores.resize_array(m_my_state_seq.get_array_size()) ;
02062     //m_my_losses.resize_array(m_my_state_seq.get_array_size()) ;
02063     m_my_scores.resize_array(my_seq_len);
02064     m_my_losses.resize_array(my_seq_len);
02065     float64_t* my_scores=m_my_scores.get_array();
02066     float64_t* my_losses=m_my_losses.get_array();
02067     CPlifBase** Plif_matrix=m_plif_matrices->get_plif_matrix();
02068     CPlifBase** Plif_state_signals=m_plif_matrices->get_state_signals();
02069 
02070     if (!m_svm_arrays_clean)
02071     {
02072         SG_ERROR( "SVM arrays not clean") ;
02073         return ;
02074     } ;
02075     //SG_PRINT( "genestr_len=%i, genestr_num=%i\n", genestr_len, genestr_num) ;
02076     //m_mod_words.display() ;
02077     //m_sign_words.display() ;
02078     //m_string_words.display() ;
02079 
02080     bool use_svm = false ;
02081 
02082     CDynamicObjectArray PEN((CSGObject**) Plif_matrix, m_N, m_N, false, false) ; // 2d, CPlifBase*
02083     PEN.set_array_name("PEN");
02084 
02085     CDynamicObjectArray PEN_state_signals((CSGObject**) Plif_state_signals, m_N, max_num_signals, false, false) ; // 2d, CPlifBase*
02086     PEN_state_signals.set_array_name("PEN_state_signals");
02087 
02088     CDynamicArray<float64_t> seq_input(seq_array, m_N, m_seq_len, max_num_signals) ;
02089     seq_input.set_array_name("seq_input");
02090 
02091     { // determine whether to use svm outputs and clear derivatives
02092         for (int32_t i=0; i<m_N; i++)
02093             for (int32_t j=0; j<m_N; j++)
02094             {
02095                 CPlifBase* penij=(CPlifBase*) PEN.element(i,j) ;
02096                 if (penij==NULL)
02097                     continue ;
02098 
02099                 if (penij->uses_svm_values())
02100                     use_svm=true ;
02101                 penij->penalty_clear_derivative() ;
02102             }
02103         for (int32_t i=0; i<m_N; i++)
02104             for (int32_t j=0; j<max_num_signals; j++)
02105             {
02106                 CPlifBase* penij=(CPlifBase*) PEN_state_signals.element(i,j) ;
02107                 if (penij==NULL)
02108                     continue ;
02109                 if (penij->uses_svm_values())
02110                     use_svm=true ;
02111                 penij->penalty_clear_derivative() ;
02112             }
02113     }
02114 
02115     { // set derivatives of p, q and a to zero
02116 
02117         for (int32_t i=0; i<m_N; i++)
02118         {
02119             m_initial_state_distribution_p_deriv.element(i)=0 ;
02120             m_end_state_distribution_q_deriv.element(i)=0 ;
02121             for (int32_t j=0; j<m_N; j++)
02122                 m_transition_matrix_a_deriv.element(i,j)=0 ;
02123         }
02124     }
02125 
02126     { // clear score vector
02127         for (int32_t i=0; i<my_seq_len; i++)
02128         {
02129             my_scores[i]=0.0 ;
02130             my_losses[i]=0.0 ;
02131         }
02132     }
02133 
02134     //int32_t total_len = 0 ;
02135 
02136     //m_transition_matrix_a.display_array() ;
02137     //m_transition_matrix_a_id.display_array() ;
02138 
02139     // compute derivatives for given path
02140     float64_t* svm_value = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02141     float64_t* svm_value_part1 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02142     float64_t* svm_value_part2 = SG_MALLOC(float64_t, m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs);
02143     for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02144     {
02145         svm_value[s]=0 ;
02146         svm_value_part1[s]=0 ;
02147         svm_value_part2[s]=0 ;
02148     }
02149 
02150     //#ifdef DYNPROG_DEBUG
02151     float64_t total_score = 0.0 ;
02152     float64_t total_loss = 0.0 ;
02153     //#endif
02154 
02155     ASSERT(my_state_seq[0]>=0) ;
02156     m_initial_state_distribution_p_deriv.element(my_state_seq[0])++ ;
02157     my_scores[0] += m_initial_state_distribution_p.element(my_state_seq[0]) ;
02158 
02159     ASSERT(my_state_seq[my_seq_len-1]>=0) ;
02160     m_end_state_distribution_q_deriv.element(my_state_seq[my_seq_len-1])++ ;
02161     my_scores[my_seq_len-1] += m_end_state_distribution_q.element(my_state_seq[my_seq_len-1]);
02162 
02163     //#ifdef DYNPROG_DEBUG
02164     total_score += my_scores[0] + my_scores[my_seq_len-1] ;
02165     //#endif
02166 
02167     SG_DEBUG( "m_seq_len=%i\n", my_seq_len) ;
02168     for (int32_t i=0; i<my_seq_len-1; i++)
02169     {
02170         if (my_state_seq[i+1]==-1)
02171             break ;
02172         int32_t from_state = my_state_seq[i] ;
02173         int32_t to_state   = my_state_seq[i+1] ;
02174         int32_t from_pos   = my_pos_seq[i] ;
02175         int32_t to_pos     = my_pos_seq[i+1] ;
02176 
02177         int32_t elem_id = m_transition_matrix_a_id.element(from_state, to_state) ;
02178         my_losses[i] = m_seg_loss_obj->get_segment_loss(from_pos, to_pos, elem_id);
02179 
02180 #ifdef DYNPROG_DEBUG
02181 
02182 
02183         if (i>0)// test if segment loss is additive
02184         {
02185             float32_t loss1 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i], elem_id);
02186             float32_t loss2 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i], my_pos_seq[i+1], elem_id);
02187             float32_t loss3 = m_seg_loss_obj->get_segment_loss(my_pos_seq[i-1], my_pos_seq[i+1], elem_id);
02188             SG_PRINT("loss1:%f loss2:%f loss3:%f, diff:%f\n", loss1, loss2, loss3, loss1+loss2-loss3);
02189             if (CMath::abs(loss1+loss2-loss3)>0)
02190             {
02191                 SG_PRINT( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02192             }
02193         }
02194         io->set_loglevel(M_DEBUG) ;
02195         SG_DEBUG( "%i. segment loss %f (id=%i): from=%i(%i), to=%i(%i)\n", i, my_losses[i], elem_id, from_pos, from_state, to_pos, to_state) ;
02196 #endif
02197         // increase usage of this transition
02198         m_transition_matrix_a_deriv.element(from_state, to_state)++ ;
02199         my_scores[i] += m_transition_matrix_a.element(from_state, to_state) ;
02200         //SG_PRINT("m_transition_matrix_a.element(%i, %i),%f \n",from_state, to_state, m_transition_matrix_a.element(from_state, to_state));
02201 #ifdef DYNPROG_DEBUG
02202         SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02203 #endif
02204 
02205         /*int32_t last_svm_pos[m_num_degrees] ;
02206           for (int32_t qq=0; qq<m_num_degrees; qq++)
02207           last_svm_pos[qq]=-1 ;*/
02208 
02209         bool is_long_transition = false ;
02210         if (m_long_transitions)
02211         {
02212             if (m_pos[to_pos]-m_pos[from_pos]>m_long_transition_threshold)
02213                 is_long_transition = true ;
02214             if (m_orf_info.element(from_state,0)!=-1)
02215                 is_long_transition = false ;
02216         }
02217 
02218         int32_t from_pos_thresh = from_pos ;
02219         int32_t to_pos_thresh = to_pos ;
02220 
02221         if (use_svm)
02222         {
02223             if (is_long_transition)
02224             {
02225 
02226                 while (from_pos_thresh<to_pos && m_pos[from_pos_thresh+1] - m_pos[from_pos] <= m_long_transition_threshold) // *
02227                     from_pos_thresh++ ;
02228                 ASSERT(from_pos_thresh<to_pos) ;
02229                 ASSERT(m_pos[from_pos_thresh] - m_pos[from_pos] <= m_long_transition_threshold); // *
02230                 ASSERT(m_pos[from_pos_thresh+1] - m_pos[from_pos] > m_long_transition_threshold);// *
02231 
02232                 int32_t frame = m_orf_info.element(from_state,0);
02233                 lookup_content_svm_values(from_pos, from_pos_thresh, m_pos[from_pos], m_pos[from_pos_thresh], svm_value_part1, frame);
02234 
02235 #ifdef DYNPROG_DEBUG
02236                 SG_PRINT("part1: pos1: %i  pos2: %i   pos3: %i  \nsvm_value_part1: ", m_pos[from_pos], m_pos[from_pos_thresh], m_pos[from_pos_thresh+1]) ;
02237                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02238                     SG_PRINT("%1.4f  ", svm_value_part1[s]);
02239                 SG_PRINT("\n");
02240 #endif
02241 
02242                 while (to_pos_thresh>0 && m_pos[to_pos] - m_pos[to_pos_thresh-1] <= m_long_transition_threshold) // *
02243                     to_pos_thresh-- ;
02244                 ASSERT(to_pos_thresh>0) ;
02245                 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh] <= m_long_transition_threshold) ; // *
02246                 ASSERT(m_pos[to_pos] - m_pos[to_pos_thresh-1] > m_long_transition_threshold) ; // *
02247 
02248                 lookup_content_svm_values(to_pos_thresh, to_pos, m_pos[to_pos_thresh], m_pos[to_pos], svm_value_part2, frame);
02249 
02250 #ifdef DYNPROG_DEBUG
02251                 SG_PRINT("part2: pos1: %i  pos2: %i   pos3: %i  \nsvm_value_part2: ", m_pos[to_pos], m_pos[to_pos_thresh], m_pos[to_pos_thresh+1]) ;
02252                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02253                     SG_PRINT("%1.4f  ", svm_value_part2[s]);
02254                 SG_PRINT("\n");
02255 #endif
02256             }
02257             else
02258             {
02259                 /* normal case */
02260 
02261                 //SG_PRINT("from_pos: %i; to_pos: %i; m_pos[to_pos]-m_pos[from_pos]: %i \n",from_pos, to_pos, m_pos[to_pos]-m_pos[from_pos]);
02262                 int32_t frame = m_orf_info.element(from_state,0);
02263                 if (false)//(frame>=0)
02264                 {
02265                     int32_t num_current_svms=0;
02266                     int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02267                     SG_PRINT("penalties(%i, %i), frame:%i  ", from_state, to_state, frame);
02268                     ((CPlifBase*) PEN.element(to_state, from_state))->get_used_svms(&num_current_svms, svm_ids);
02269                     SG_PRINT("\n");
02270                 }
02271 
02272                 lookup_content_svm_values(from_pos, to_pos, m_pos[from_pos],m_pos[to_pos], svm_value, frame);
02273 #ifdef DYNPROG_DEBUG
02274                 SG_PRINT("part2: pos1: %i  pos2: %i   \nsvm_values: ", m_pos[from_pos], m_pos[to_pos]) ;
02275                 for (int32_t s=0; s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs; s++)
02276                     SG_PRINT("%1.4f  ", svm_value[s]);
02277                 SG_PRINT("\n");
02278 #endif
02279             }
02280         }
02281 
02282         if (PEN.element(to_state, from_state)!=NULL)
02283         {
02284             float64_t nscore = 0 ;
02285             if (is_long_transition)
02286             {
02287                 float64_t pen_value_part1 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1) ;
02288                 float64_t pen_value_part2 = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2) ;
02289                 nscore= 0.5*pen_value_part1 + 0.5*pen_value_part2 ;
02290             }
02291             else
02292                 nscore = ((CPlifBase*) PEN.element(to_state, from_state))->lookup_penalty(m_pos[to_pos]-m_pos[from_pos], svm_value) ;
02293 
02294             if (false)//(nscore<-1e9)
02295                     SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f\n",
02296                         is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state, nscore) ;
02297 
02298             my_scores[i] += nscore ;
02299 
02300             for (int32_t s=m_num_svms;s<m_num_lin_feat_plifs_cum[m_num_raw_data]; s++)/*set tiling plif values to neutral values (that do not influence derivative calculation)*/
02301             {
02302                 svm_value[s]=-CMath::INFTY;
02303                 svm_value_part1[s]=-CMath::INFTY;
02304                 svm_value_part2[s]=-CMath::INFTY;
02305             }
02306 
02307 #ifdef DYNPROG_DEBUG
02308             //SG_DEBUG( "%i. transition penalty: from_state=%i to_state=%i from_pos=%i [%i] to_pos=%i [%i] value=%i\n", i, from_state, to_state, from_pos, m_pos[from_pos], to_pos, m_pos[to_pos], m_pos[to_pos]-m_pos[from_pos]) ;
02309 #endif
02310             if (is_long_transition)
02311             {
02312 #ifdef DYNPROG_DEBUG
02313                 float64_t sum_score = 0.0 ;
02314 
02315                 for (int kk=0; kk<i; kk++)
02316                     sum_score += my_scores[i] ;
02317 
02318                 SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), to_pos=%i (%i)=> %1.5f, %1.5f --- 1: %1.6f (%i-%i)  2: %1.6f (%i-%i) \n",
02319                         is_long_transition, m_pos[from_pos], from_state, m_pos[to_pos], to_state,
02320                         nscore, sum_score,
02321                         PEN.element(to_state, from_state)->lookup_penalty(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1)*0.5, m_pos[from_pos], m_pos[from_pos_thresh],
02322                         PEN.element(to_state, from_state)->lookup_penalty(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2)*0.5, m_pos[to_pos_thresh], m_pos[to_pos]) ;
02323 #endif
02324             }
02325 
02326             if (is_long_transition)
02327             {
02328                 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[from_pos_thresh]-m_pos[from_pos], svm_value_part1, 0.5) ;
02329                 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[to_pos_thresh], svm_value_part2, 0.5) ;
02330             }
02331             else
02332                 ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(m_pos[to_pos]-m_pos[from_pos], svm_value, 1) ;
02333 
02334             //SG_PRINT("m_num_raw_data = %i \n", m_num_raw_data) ;
02335 
02336             // for tiling array and rna-seq data every single measurement must be added to the derivative
02337             // in contrast to the content svm predictions where we have a single value per transition;
02338             // content svm predictions have already been added to the derivative, thus we start with d=1
02339             // instead of d=0
02340             if (is_long_transition)
02341             {
02342                 for (int32_t d=1; d<=m_num_raw_data; d++)
02343                 {
02344                     for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02345                         svm_value[s]=-CMath::INFTY;
02346                     float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
02347                     int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[from_pos_thresh],intensities, d);
02348                     for (int32_t k=0;k<num_intensities;k++)
02349                     {
02350                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02351                             svm_value[j]=intensities[k];
02352 
02353                         ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02354 
02355                     }
02356                     num_intensities = raw_intensities_interval_query(m_pos[to_pos_thresh], m_pos[to_pos],intensities, d);
02357                     for (int32_t k=0;k<num_intensities;k++)
02358                     {
02359                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02360                             svm_value[j]=intensities[k];
02361 
02362                         ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 0.5) ;
02363 
02364                     }
02365                     SG_FREE(intensities);
02366 
02367                 }
02368             }
02369             else
02370             {
02371                 for (int32_t d=1; d<=m_num_raw_data; d++)
02372                 {
02373                     for (int32_t s=0;s<m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;s++)
02374                         svm_value[s]=-CMath::INFTY;
02375                     float64_t* intensities = SG_MALLOC(float64_t, m_num_probes_cum[d]);
02376                     int32_t num_intensities = raw_intensities_interval_query(m_pos[from_pos], m_pos[to_pos],intensities, d);
02377                     //SG_PRINT("m_pos[from_pos]:%i, m_pos[to_pos]:%i, num_intensities:%i\n",m_pos[from_pos],m_pos[to_pos], num_intensities);
02378                     for (int32_t k=0;k<num_intensities;k++)
02379                     {
02380                         for (int32_t j=m_num_lin_feat_plifs_cum[d-1];j<m_num_lin_feat_plifs_cum[d];j++)
02381                             svm_value[j]=intensities[k];
02382 
02383                         ((CPlifBase*) PEN.element(to_state, from_state))->penalty_add_derivative(-CMath::INFTY, svm_value, 1) ;
02384 
02385                     }
02386                     SG_FREE(intensities);
02387                 }
02388             }
02389 
02390         }
02391 #ifdef DYNPROG_DEBUG
02392         SG_DEBUG( "%i. scores[i]=%f\n", i, my_scores[i]) ;
02393 #endif
02394 
02395         //SG_DEBUG( "emmission penalty skipped: to_state=%i to_pos=%i value=%1.2f score=%1.2f\n", to_state, to_pos, seq_input.element(to_state, to_pos), 0.0) ;
02396         for (int32_t k=0; k<max_num_signals; k++)
02397         {
02398             if ((PEN_state_signals.element(to_state,k)==NULL)&&(k==0))
02399             {
02400 #ifdef DYNPROG_DEBUG
02401                 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i score=%1.2f (no signal plif)\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k)) ;
02402 #endif
02403                 my_scores[i] += seq_input.element(to_state, to_pos, k) ;
02404                 //if (seq_input.element(to_state, to_pos, k) !=0)
02405                 //  SG_PRINT("features(%i,%i): %f\n",to_state,to_pos,seq_input.element(to_state, to_pos, k));
02406                 break ;
02407             }
02408             if (PEN_state_signals.element(to_state, k)!=NULL)
02409             {
02410                 float64_t nscore = ((CPlifBase*) PEN_state_signals.element(to_state,k))->lookup_penalty(seq_input.element(to_state, to_pos, k), svm_value) ; // this should be ok for long_transitions (svm_value does not matter)
02411                 my_scores[i] += nscore ;
02412 #ifdef DYNPROG_DEBUG
02413                 if (false)//(nscore<-1e9)
02414                 {
02415                     SG_PRINT("is_long_transition=%i  (from_pos=%i (%i), from_state=%i, to_pos=%i (%i) to_state=%i=> %1.5f, dim3:%i, seq_input.element(to_state, to_pos, k): %1.4f\n",
02416                         is_long_transition, m_pos[from_pos], from_pos, from_state, m_pos[to_pos], to_pos, to_state, nscore, k, seq_input.element(to_state, to_pos, k)) ;
02417                     for (int x=0; x<23; x++)
02418                     {
02419                         for (int i=-10; i<10; i++)
02420                             SG_PRINT("%1.4f\t", seq_input.element(x, to_pos+i, k));
02421                         SG_PRINT("\n");
02422                     }
02423 
02424                 }
02425 #endif
02426                 //break ;
02427                 //int32_t num_current_svms=0;
02428                 //int32_t svm_ids[] = {-8, -7, -6, -5, -4, -3, -2, -1};
02429                 //SG_PRINT("PEN_state_signals->id: ");
02430                 //PEN_state_signals.element(to_state, k)->get_used_svms(&num_current_svms, svm_ids);
02431                 //SG_PRINT("\n");
02432                 //if (nscore != 0)
02433                 //SG_PRINT( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
02434 #ifdef DYNPROG_DEBUG
02435                 SG_DEBUG( "%i. emmission penalty: to_state=%i to_pos=%i value=%1.2f score=%1.2f k=%i\n", i, to_state, to_pos, seq_input.element(to_state, to_pos, k), nscore, k) ;
02436 #endif
02437                 ((CPlifBase*) PEN_state_signals.element(to_state,k))->penalty_add_derivative(seq_input.element(to_state, to_pos, k), svm_value, 1) ; // this should be ok for long_transitions (svm_value does not matter)
02438             } else
02439                 break ;
02440         }
02441 
02442         //#ifdef DYNPROG_DEBUG
02443         //SG_PRINT( "scores[%i]=%f (final) \n", i, my_scores[i]) ;
02444         //SG_PRINT( "losses[%i]=%f (final) , total_loss: %f \n", i, my_losses[i], total_loss) ;
02445         total_score += my_scores[i] ;
02446         total_loss += my_losses[i] ;
02447         //#endif
02448     }
02449     //#ifdef DYNPROG_DEBUG
02450     //SG_PRINT( "total score = %f \n", total_score) ;
02451     //SG_PRINT( "total loss = %f \n", total_loss) ;
02452     //#endif
02453     SG_FREE(svm_value);
02454     SG_FREE(svm_value_part1);
02455     SG_FREE(svm_value_part2);
02456 }
02457 
02458 int32_t CDynProg::raw_intensities_interval_query(const int32_t from_pos, const int32_t to_pos, float64_t* intensities, int32_t type)
02459 {
02460     ASSERT(from_pos<to_pos);
02461     int32_t num_intensities = 0;
02462     int32_t* p_tiling_pos  = &m_probe_pos[m_num_probes_cum[type-1]];
02463     float64_t* p_tiling_data = &m_raw_intensities[m_num_probes_cum[type-1]];
02464     int32_t last_pos;
02465     int32_t num = m_num_probes_cum[type-1];
02466     while (*p_tiling_pos<to_pos)
02467     {
02468         if (*p_tiling_pos>=from_pos)
02469         {
02470             intensities[num_intensities] = *p_tiling_data;
02471             num_intensities++;
02472         }
02473         num++;
02474         if (num>=m_num_probes_cum[type])
02475             break;
02476         last_pos = *p_tiling_pos;
02477         p_tiling_pos++;
02478         p_tiling_data++;
02479         ASSERT(last_pos<*p_tiling_pos);
02480     }
02481     return num_intensities;
02482 }
02483 
02484 void CDynProg::lookup_content_svm_values(const int32_t from_state, const int32_t to_state, const int32_t from_pos, const int32_t to_pos, float64_t* svm_values, int32_t frame)
02485 {
02486 #ifdef DYNPROG_TIMING_DETAIL
02487     MyTime.start() ;
02488 #endif
02489 //  ASSERT(from_state<to_state);
02490 //  if (!(from_pos<to_pos))
02491 //      SG_ERROR("from_pos!<to_pos, from_pos: %i to_pos: %i \n",from_pos,to_pos);
02492     for (int32_t i=0;i<m_num_svms;i++)
02493     {
02494         float64_t to_val   = m_lin_feat.get_element(i, to_state);
02495         float64_t from_val = m_lin_feat.get_element(i, from_state);
02496         svm_values[i] = (to_val-from_val)/(to_pos-from_pos);
02497     }
02498     for (int32_t i=m_num_svms;i<m_num_lin_feat_plifs_cum[m_num_raw_data];i++)
02499     {
02500         float64_t to_val   = m_lin_feat.get_element(i, to_state);
02501         float64_t from_val = m_lin_feat.get_element(i, from_state);
02502         svm_values[i] = to_val-from_val ;
02503     }
02504     if (m_intron_list)
02505     {
02506         int32_t* support = SG_MALLOC(int32_t, m_num_intron_plifs);
02507         m_intron_list->get_intron_support(support, from_state, to_state);
02508         int32_t intron_list_start = m_num_lin_feat_plifs_cum[m_num_raw_data];
02509         int32_t intron_list_end = m_num_lin_feat_plifs_cum[m_num_raw_data]+m_num_intron_plifs;
02510         int32_t cnt = 0;
02511         for (int32_t i=intron_list_start; i<intron_list_end;i++)
02512         {
02513             svm_values[i] = (float64_t) (support[cnt]);
02514             cnt++;
02515         }
02516         //if (to_pos>3990 && to_pos<4010)
02517         //  SG_PRINT("from_state:%i to_state:%i support[0]:%i support[1]:%i\n",from_state, to_state, support[0], support[1]);
02518         SG_FREE(support);
02519     }
02520     // find the correct row with precomputed frame predictions
02521     if (frame!=-1)
02522     {
02523         svm_values[frame_plifs[0]] = 1e10;
02524         svm_values[frame_plifs[1]] = 1e10;
02525         svm_values[frame_plifs[2]] = 1e10;
02526         int32_t global_frame = from_pos%3;
02527         int32_t row = ((global_frame+frame)%3)+4;
02528         float64_t to_val   = m_lin_feat.get_element(row, to_state);
02529         float64_t from_val = m_lin_feat.get_element(row, from_state);
02530         svm_values[frame+frame_plifs[0]] = (to_val-from_val)/(to_pos-from_pos);
02531     }
02532 #ifdef DYNPROG_TIMING_DETAIL
02533     MyTime.stop() ;
02534     content_svm_values_time += MyTime.time_diff_sec() ;
02535 #endif
02536 }
02537 void CDynProg::set_intron_list(CIntronList* intron_list, int32_t num_plifs)
02538 {
02539     m_intron_list = intron_list;
02540     m_num_intron_plifs = num_plifs;
02541 }
02542 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation