OligoStringKernel.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2008 Christian Igel, Tobias Glasmachers
00008  * Copyright (C) 2008 Christian Igel, Tobias Glasmachers
00009  *
00010  * Shogun adjustments (W) 2008-2009 Soeren Sonnenburg
00011  * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00012  */
00013 #ifndef _OLIGOSTRINGKERNEL_H_
00014 #define _OLIGOSTRINGKERNEL_H_
00015 
00016 #include "kernel/StringKernel.h"
00017 
00018 #include <vector>
00019 #include <string>
00020 
00021 namespace shogun
00022 {
00041 class COligoStringKernel : public CStringKernel<char>
00042 {
00043     public:
00045         COligoStringKernel(void);
00046 
00052         COligoStringKernel(int32_t cache_size, int32_t k, float64_t width);
00053 
00055         virtual ~COligoStringKernel();
00056 
00063         virtual bool init(CFeatures* l, CFeatures* r);
00064 
00069         virtual EKernelType get_kernel_type() { return K_OLIGO; }
00070 
00075         virtual const char* get_name() const { return "OligoStringKernel"; }
00076 
00077 
00078         virtual float64_t compute(int32_t x, int32_t y);
00079 
00082         virtual void cleanup();
00083 
00084     protected:
00098         static void encodeOligo(
00099             const std::string& sequence, uint32_t k_mer_length,
00100             const std::string& allowed_characters,
00101             std::vector< std::pair<int32_t, float64_t> >&   values);
00102 
00110         static void getSequences(
00111             const std::vector<std::string>& sequences,
00112             uint32_t k_mer_length, const std::string& allowed_characters,
00113             std::vector< std::vector< std::pair<int32_t, float64_t> > >& encoded_sequences);
00114 
00130         float64_t kernelOligoFast(
00131             const std::vector< std::pair<int32_t, float64_t> >& x,
00132             const std::vector< std::pair<int32_t, float64_t> >& y,
00133             int32_t max_distance = -1);
00134 
00135     private: 
00146         void getExpFunctionCache(uint32_t sequence_length);
00147 
00148         static inline bool cmpOligos_(std::pair<int32_t, float64_t> a,
00149                 std::pair<int32_t, float64_t> b )
00150         {
00151             return (a.second < b.second);
00152         }
00153 
00154         void init();
00155 
00156     protected:
00158         int32_t k;
00160         float64_t width;
00162         float64_t* gauss_table;
00164         int32_t gauss_table_len;
00165 };
00166 }
00167 #endif // _OLIGOSTRINGKERNEL_H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation