23 using namespace shogun;
77 getExpFunctionCache(max_len);
82 const std::string& sequence, uint32_t k_mer_length,
83 const std::string& allowed_characters,
84 std::vector< std::pair<int32_t, float64_t> >& values)
88 std::map<std::string::value_type, uint32_t> residue_values;
90 uint32_t number_of_residues = allowed_characters.size();
91 uint32_t sequence_length = sequence.size();
92 bool sequence_ok =
true;
95 for (uint32_t i = 0; i < sequence.size(); ++i)
97 if (allowed_characters.find(sequence.at(i)) == std::string::npos)
101 if (sequence_ok && k_mer_length <= sequence_length)
103 values.resize(sequence_length - k_mer_length + 1,
104 std::pair<int32_t, float64_t>());
105 for (uint32_t i = 0; i < number_of_residues; ++i)
107 residue_values.insert(std::make_pair(allowed_characters[i], counter));
110 for (int32_t
k = k_mer_length - 1;
k >= 0;
k--)
112 oligo_value += factor * residue_values[sequence[
k]];
113 factor *= number_of_residues;
115 factor /= number_of_residues;
117 values[counter].first = 1;
118 values[counter].second = oligo_value;
121 for (uint32_t j = 1; j < sequence_length - k_mer_length + 1; j++)
123 oligo_value -= factor * residue_values[sequence[j - 1]];
124 oligo_value = oligo_value * number_of_residues +
125 residue_values[sequence[j + k_mer_length - 1]];
127 values[counter].first = j + 1;
128 values[counter].second = oligo_value ;
131 stable_sort(values.begin(), values.end(), cmpOligos_);
140 const std::vector<std::string>& sequences, uint32_t k_mer_length,
141 const std::string& allowed_characters,
142 std::vector< std::vector< std::pair<int32_t, float64_t> > >& encoded_sequences)
144 std::vector< std::pair<int32_t, float64_t> > temp_vector;
145 encoded_sequences.resize(sequences.size(),
146 std::vector< std::pair<int32_t, float64_t> >());
148 for (uint32_t i = 0; i < sequences.size(); ++i)
150 encodeOligo(sequences[i], k_mer_length, allowed_characters, temp_vector);
151 encoded_sequences[i] = temp_vector;
155 void COligoStringKernel::getExpFunctionCache(uint32_t sequence_length)
160 for (uint32_t i = 1; i < sequence_length; i++)
165 const std::vector< std::pair<int32_t, float64_t> >& x,
166 const std::vector< std::pair<int32_t, float64_t> >& y,
167 int32_t max_distance)
173 uint32_t x_size = x.size();
174 uint32_t y_size = y.size();
176 while ((uint32_t) i1 + 1 < x_size && (uint32_t) i2 + 1 < y_size)
178 if (x[i1].second == y[i2].second)
181 || (abs(x[i1].first - y[i2].first)) <= max_distance)
183 result += gauss_table[abs((x[i1].first - y[i2].first))];
184 if (x[i1].second == x[i1 + 1].second)
189 else if (y[i2].second == y[i2 + 1].second)
203 if (x[i1].first < y[i2].first)
205 if (x[i1].second == x[i1 + 1].second)
209 else if (y[i2].second == y[i2 + 1].second)
211 while (y[i2].second == y[i2].second)
233 if (x[i1].second < y[i2].second)
244 const std::vector< std::pair<int32_t, float64_t> >& x,
245 const std::vector< std::pair<int32_t, float64_t> >& y)
251 uint32_t x_size = x.size();
252 uint32_t y_size = y.size();
254 while ((uint32_t) i1 < x_size && (uint32_t) i2 < y_size)
256 if (x[i1].second == y[i2].second)
260 if (((uint32_t) i1+1) < x_size && x[i1].second == x[i1 + 1].second)
265 else if (((uint32_t) i2+1) <y_size && y[i2].second == y[i2 + 1].second)
279 if (x[i1].second < y[i2].second)
296 std::vector< std::pair<int32_t, float64_t> > aenc;
297 std::vector< std::pair<int32_t, float64_t> > benc;
307 void COligoStringKernel::init()