IntronList.cpp

Go to the documentation of this file.
00001 
00002 #include <stdio.h>
00003 #include <string.h>
00004 
00005 #include "lib/Mathematics.h"
00006 #include "lib/config.h"
00007 #include "lib/io.h"
00008 #include "structure/IntronList.h"
00009 
00010 using namespace shogun;
00011 
00012 CIntronList::CIntronList()
00013 :CSGObject()
00014 {
00015     m_length = 0;
00016     m_all_pos = NULL;
00017     m_intron_list = NULL;
00018     m_quality_list = NULL;
00019 }
00020 CIntronList::~CIntronList()
00021 {
00022     for (int i=0; i<m_length; i++)
00023     {
00024         free(m_intron_list[i]);
00025         free(m_quality_list[i]);
00026     }
00027     delete[] m_intron_list;
00028     delete[] m_quality_list;
00029     delete[] m_all_pos;
00030 }
00031 void CIntronList::init_list(int32_t* all_pos, int32_t len)
00032 {
00033     m_length = len;
00034     m_all_pos = new int32_t[len];
00035     memcpy(m_all_pos, all_pos, len*sizeof(int32_t));
00036     m_intron_list = new int32_t*[len];
00037     m_quality_list = new int32_t*[len];
00038     if (m_intron_list==NULL||m_quality_list==NULL)
00039         SG_ERROR("IntronList: Out of mem 1");
00040     //initialize all elements with an array of length one
00041     int32_t* one;
00042     for (int i=0;i<m_length;i++)
00043     {
00044         one = (int32_t*) malloc(sizeof(int32_t));//use malloc here because mem can be increased efficiently with realloc later
00045         if (one==NULL)
00046             SG_ERROR("IntronList: Out of mem 2");
00047         m_intron_list[i] = one;
00048         m_intron_list[i][0] = 1;
00049         one = (int32_t*) malloc(sizeof(int32_t));
00050         if (one==NULL)
00051             SG_ERROR("IntronList: Out of mem 3");
00052         m_quality_list[i] = one;
00053         m_quality_list[i][0] = 1;
00054     }
00055 }
00056 void CIntronList::read_introns(int32_t* start_pos, int32_t* end_pos, int32_t* quality, int32_t len)
00057 {
00058     int k=0;
00059     for(int i=0;i<m_length;i++)//iterate over candidate positions
00060     {
00061         while (k<len)
00062         {
00063             //SG_PRINT("i:%i, m_all_pos[i]:%i, k:%i, end_pos[k]: %i\n", i, m_all_pos[i], k, end_pos[k]) ;
00064             if (k>0)
00065                 if (end_pos[k]<end_pos[k-1])
00066                     SG_ERROR("end pos array is not sorted: end_pos[k-1]:%i end_pos[k]:%i\n", end_pos[k-1], end_pos[k]);
00067             if (end_pos[k]>=m_all_pos[i])
00068                 break;
00069             else
00070                 k++;
00071                 
00072         }
00073         while (k<len && end_pos[k]==m_all_pos[i])
00074         {
00075             //SG_PRINT("\tk:%i, end_pos[k]: %i, start_pos[k]:%i\n", k, end_pos[k], start_pos[k]) ;
00076             ASSERT(start_pos[k]<end_pos[k]);
00077             ASSERT(end_pos[k]<=m_all_pos[m_length-1]);
00078             // intron list
00079             //------------
00080             int32_t from_list_len = m_intron_list[i][0];
00081             int32_t* new_list = (int32_t*) realloc(m_intron_list[i], (from_list_len+1)*sizeof(int32_t));
00082             if (new_list == NULL)
00083                 SG_ERROR("IntronList: Out of mem 4");
00084             new_list[from_list_len]= start_pos[k];
00085             new_list[0]++;
00086             m_intron_list[i] = new_list;
00087             // quality list
00088             //--------------
00089             int32_t q_list_len = m_quality_list[i][0];
00090             //SG_PRINT("\t q_list_len:%i, from_list_len:%i \n",q_list_len, from_list_len);
00091             ASSERT(q_list_len==from_list_len);
00092             new_list = (int32_t*) realloc(m_quality_list[i], (q_list_len+1)*sizeof(int32_t));
00093             if (new_list == NULL)
00094                 SG_ERROR("IntronList: Out of mem 5");
00095             new_list[q_list_len]= quality[k];
00096             new_list[0]++;
00097             m_quality_list[i] = new_list;
00098 
00099             k++;
00100         }           
00101     }
00102 }
00107 void CIntronList::get_intron_support(int32_t* values, int32_t from_pos, int32_t to_pos)
00108 {
00109     if (from_pos>=m_length)
00110         SG_ERROR("from_pos (%i) is not < m_length (%i)\n",to_pos, m_length);
00111     if (to_pos>=m_length)
00112         SG_ERROR("to_pos (%i) is not < m_length (%i)\n",to_pos, m_length);
00113     int32_t* from_list = m_intron_list[to_pos];
00114     int32_t* q_list = m_quality_list[to_pos];
00115 
00116     //SG_PRINT("from_list[0]: %i\n", from_list[0]); 
00117 
00118     int32_t coverage = 0;
00119     int32_t quality = 0;
00120     for (int i=1;i<from_list[0]; i++)
00121     {
00122         //SG_PRINT("from_list[%i]: %i, m_all_pos[from_pos]:%i\n", i,  from_list[i], m_all_pos[from_pos]);
00123         if (from_list[i]==m_all_pos[from_pos])
00124         {
00125             //SG_PRINT("found intron: %i->%i\n", from_pos, to_pos );
00126             coverage = coverage+1;
00127             quality = CMath::max(quality, q_list[i]); 
00128         }
00129     }   
00130     values[0] = coverage;
00131     values[1] = quality;
00132 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation