22 using namespace shogun;
65 getExpFunctionCache(max_len);
70 const std::string& sequence, uint32_t k_mer_length,
71 const std::string& allowed_characters,
72 std::vector< std::pair<int32_t, float64_t> >& values)
76 std::map<std::string::value_type, uint32_t> residue_values;
78 uint32_t number_of_residues = allowed_characters.size();
79 uint32_t sequence_length = sequence.size();
80 bool sequence_ok =
true;
83 for (uint32_t i = 0; i < sequence.size(); ++i)
85 if (allowed_characters.find(sequence.at(i)) == std::string::npos)
89 if (sequence_ok && k_mer_length <= sequence_length)
91 values.resize(sequence_length - k_mer_length + 1,
92 std::pair<int32_t, float64_t>());
93 for (uint32_t i = 0; i < number_of_residues; ++i)
95 residue_values.insert(std::make_pair(allowed_characters[i], counter));
98 for (int32_t
k = k_mer_length - 1;
k >= 0;
k--)
100 oligo_value += factor * residue_values[sequence[
k]];
101 factor *= number_of_residues;
103 factor /= number_of_residues;
105 values[counter].first = 1;
106 values[counter].second = oligo_value;
109 for (uint32_t j = 1; j < sequence_length - k_mer_length + 1; j++)
111 oligo_value -= factor * residue_values[sequence[j - 1]];
112 oligo_value = oligo_value * number_of_residues +
113 residue_values[sequence[j + k_mer_length - 1]];
115 values[counter].first = j + 1;
116 values[counter].second = oligo_value ;
119 stable_sort(values.begin(), values.end(), cmpOligos_);
128 const std::vector<std::string>& sequences, uint32_t k_mer_length,
129 const std::string& allowed_characters,
130 std::vector< std::vector< std::pair<int32_t, float64_t> > >& encoded_sequences)
132 std::vector< std::pair<int32_t, float64_t> > temp_vector;
133 encoded_sequences.resize(sequences.size(),
134 std::vector< std::pair<int32_t, float64_t> >());
136 for (uint32_t i = 0; i < sequences.size(); ++i)
138 encodeOligo(sequences[i], k_mer_length, allowed_characters, temp_vector);
139 encoded_sequences[i] = temp_vector;
143 void COligoStringKernel::getExpFunctionCache(uint32_t sequence_length)
149 for (uint32_t i = 1; i < sequence_length; i++)
156 const std::vector< std::pair<int32_t, float64_t> >& x,
157 const std::vector< std::pair<int32_t, float64_t> >& y,
158 int32_t max_distance)
164 uint32_t x_size = x.size();
165 uint32_t y_size = y.size();
167 while ((uint32_t) i1 + 1 < x_size && (uint32_t) i2 + 1 < y_size)
169 if (x[i1].second == y[i2].second)
172 || (abs(x[i1].first - y[i2].first)) <= max_distance)
174 result +=
gauss_table[abs((x[i1].first - y[i2].first))];
175 if (x[i1].second == x[i1 + 1].second)
180 else if (y[i2].second == y[i2 + 1].second)
194 if (x[i1].first < y[i2].first)
196 if (x[i1].second == x[i1 + 1].second)
200 else if (y[i2].second == y[i2 + 1].second)
202 while (y[i2].second == y[i2+1].second)
224 if (x[i1].second < y[i2].second)
241 std::vector< std::pair<int32_t, float64_t> > aenc;
242 std::vector< std::pair<int32_t, float64_t> > benc;
251 void COligoStringKernel::init()
263 "Gauss Cache Table.");