SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
IntronList.cpp
Go to the documentation of this file.
1 
2 #include <stdio.h>
3 #include <string.h>
4 
6 #include <shogun/lib/config.h>
7 #include <shogun/io/SGIO.h>
9 
10 using namespace shogun;
11 
13 :CSGObject()
14 {
15  m_length = 0;
16  m_all_pos = NULL;
17  m_intron_list = NULL;
18  m_quality_list = NULL;
19 }
21 {
22  for (int i=0; i<m_length; i++)
23  {
24  SG_FREE(m_intron_list[i]);
25  SG_FREE(m_quality_list[i]);
26  }
27  SG_FREE(m_intron_list);
28  SG_FREE(m_quality_list);
29  SG_FREE(m_all_pos);
30 }
31 void CIntronList::init_list(int32_t* all_pos, int32_t len)
32 {
33  m_length = len;
34  m_all_pos = SG_MALLOC(int32_t, len);
35  memcpy(m_all_pos, all_pos, len*sizeof(int32_t));
36  m_intron_list = SG_MALLOC(int32_t*, len);
37  m_quality_list = SG_MALLOC(int32_t*, len);
38 
39  //initialize all elements with an array of length one
40  int32_t* one;
41  for (int i=0;i<m_length;i++)
42  {
43  one = SG_MALLOC(int32_t, 1);//use malloc here because mem can be increased efficiently with realloc later
44  m_intron_list[i] = one;
45  m_intron_list[i][0] = 1;
46  one = SG_MALLOC(int32_t, 1);
47  m_quality_list[i] = one;
48  m_quality_list[i][0] = 1;
49  }
50 }
51 void CIntronList::read_introns(int32_t* start_pos, int32_t* end_pos, int32_t* quality, int32_t len)
52 {
53  int k=0;
54  for(int i=0;i<m_length;i++)//iterate over candidate positions
55  {
56  while (k<len)
57  {
58  //SG_PRINT("i:%i, m_all_pos[i]:%i, k:%i, end_pos[k]: %i\n", i, m_all_pos[i], k, end_pos[k])
59  if (k>0)
60  if (end_pos[k]<end_pos[k-1])
61  SG_ERROR("end pos array is not sorted: end_pos[k-1]:%i end_pos[k]:%i\n", end_pos[k-1], end_pos[k])
62  if (end_pos[k]>=m_all_pos[i])
63  break;
64  else
65  k++;
66 
67  }
68  while (k<len && end_pos[k]==m_all_pos[i])
69  {
70  //SG_PRINT("\tk:%i, end_pos[k]: %i, start_pos[k]:%i\n", k, end_pos[k], start_pos[k])
71  ASSERT(start_pos[k]<end_pos[k])
72  ASSERT(end_pos[k]<=m_all_pos[m_length-1])
73  // intron list
74  //------------
75  int32_t from_list_len = m_intron_list[i][0];
76  int32_t* new_list = SG_REALLOC(int32_t, m_intron_list[i], from_list_len, (from_list_len+1));
77  if (new_list == NULL)
78  SG_ERROR("IntronList: Out of mem 4")
79  new_list[from_list_len]= start_pos[k];
80  new_list[0]++;
81  m_intron_list[i] = new_list;
82  // quality list
83  //--------------
84  int32_t q_list_len = m_quality_list[i][0];
85  //SG_PRINT("\t q_list_len:%i, from_list_len:%i \n",q_list_len, from_list_len)
86  ASSERT(q_list_len==from_list_len)
87  new_list = SG_REALLOC(int32_t, m_quality_list[i], q_list_len, (q_list_len+1));
88  if (new_list == NULL)
89  SG_ERROR("IntronList: Out of mem 5")
90  new_list[q_list_len]= quality[k];
91  new_list[0]++;
92  m_quality_list[i] = new_list;
93 
94  k++;
95  }
96  }
97 }
102 void CIntronList::get_intron_support(int32_t* values, int32_t from_pos, int32_t to_pos)
103 {
104  if (from_pos>=m_length)
105  SG_ERROR("from_pos (%i) is not < m_length (%i)\n",to_pos, m_length)
106  if (to_pos>=m_length)
107  SG_ERROR("to_pos (%i) is not < m_length (%i)\n",to_pos, m_length)
108  int32_t* from_list = m_intron_list[to_pos];
109  int32_t* q_list = m_quality_list[to_pos];
110 
111  //SG_PRINT("from_list[0]: %i\n", from_list[0])
112 
113  int32_t coverage = 0;
114  int32_t quality = 0;
115  for (int i=1;i<from_list[0]; i++)
116  {
117  //SG_PRINT("from_list[%i]: %i, m_all_pos[from_pos]:%i\n", i, from_list[i], m_all_pos[from_pos])
118  if (from_list[i]==m_all_pos[from_pos])
119  {
120  //SG_PRINT("found intron: %i->%i\n", from_pos, to_pos )
121  coverage = coverage+1;
122  quality = CMath::max(quality, q_list[i]);
123  }
124  }
125  values[0] = coverage;
126  values[1] = quality;
127 }

SHOGUN Machine Learning Toolbox - Documentation