34 using namespace shogun;
44 float64_t* AA_matrix_, int32_t nr, int32_t nc,
46 alphabet(NULL), degree(degree_), max_mismatch(max_mismatch_), width(width_)
60 :
CStringKernel<char>(size), alphabet(NULL), degree(degree_), max_mismatch(max_mismatch_), width(width_)
85 int32_t lhs_changed=(
lhs!=l);
86 int32_t rhs_changed=(
rhs!=r);
90 SG_DEBUG(
"lhs_changed: %i\n", lhs_changed);
91 SG_DEBUG(
"rhs_changed: %i\n", rhs_changed);
124 for (
unsigned int i=0; i<path.size(); i++)
126 if (path[i]!=joint_seq[index+i])
128 diff +=
AA_matrix[ (path[i]-1)*128 + path[i] - 1] ;
129 diff -= 2*
AA_matrix[ (path[i]-1)*128 + joint_seq[index+i] - 1] ;
130 diff +=
AA_matrix[ (joint_seq[index+i]-1)*128 + joint_seq[index+i] - 1] ;
134 return exp( - diff/
width) ;
228 std::vector<struct joint_list_struct> &joint_list,
229 std::string path,
unsigned int d)
231 const char* AA =
"ACDEFGHIKLMNPQRSTVWY" ;
232 const unsigned int num_AA = strlen(AA) ;
234 assert(path.size()==d) ;
236 for (
unsigned int i=0; i<num_AA; i++)
238 std::vector<struct joint_list_struct> joint_list_ ;
241 fprintf(stderr,
"i=%i: ", i) ;
247 fprintf(stdout,
"*") ;
252 fprintf(stdout,
"+") ;
256 for (
unsigned int j=0; j<joint_list.size(); j++)
258 if (joint_seq[joint_list[j].index+d] != AA[i])
260 if (joint_list[j].mismatch+1 <= (
unsigned int)
max_mismatch)
262 struct joint_list_struct list_item ;
263 list_item = joint_list[j] ;
264 list_item.mismatch = joint_list[j].mismatch+1 ;
265 joint_list_.push_back(list_item) ;
269 joint_list_.push_back(joint_list[j]) ;
272 if (joint_list_.size()>0)
274 std::string path_ = path + AA[i] ;
276 if (d+1 < (
unsigned int)
degree)
286 for (
unsigned int j=0; j<joint_list_.size(); j++)
290 feats[joint_list_[j].ex_index]++ ;
296 if (joint_list_[j].mismatch!=0)
297 feats[joint_list_[j].ex_index] +=
AA_helper(path_, joint_seq, joint_list_[j].index) ;
299 feats[joint_list_[j].ex_index] ++ ;
303 std::vector<int> idx ;
308 for (
unsigned int r=0; r<idx.size(); r++)
309 for (
unsigned int s=r; s<idx.size(); s++)
320 fprintf(stdout,
"\n") ;
326 std::string joint_seq ;
327 std::vector<struct joint_list_struct> joint_list ;
342 for (
int apos=0; apos+
degree-1<alen; apos++)
344 struct joint_list_struct list_item ;
345 list_item.ex_index = i ;
346 list_item.index = apos+joint_seq.size() ;
347 list_item.mismatch = 0 ;
349 joint_list.push_back(list_item) ;
351 joint_seq += std::string(avec, alen) ;
418 if (nr!=128 || nc!=128)
419 SG_ERROR(
"AA_matrix should be of shape 128x128\n");
435 if (
lhs!=NULL &&
rhs!=NULL)
447 "128*128 scalar product matrix");
455 "the kernel matrix with its length defined by the number of "
456 "vectors of the string features");
465 void CSpectrumMismatchRBFKernel::init()