FixedDegreeStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/lib/common.h>
00012 #include <shogun/kernel/FixedDegreeStringKernel.h>
00013 #include <shogun/kernel/SqrtDiagKernelNormalizer.h>
00014 #include <shogun/features/Features.h>
00015 #include <shogun/features/StringFeatures.h>
00016 #include <shogun/io/SGIO.h>
00017 
00018 using namespace shogun;
00019 
00020 void
00021 CFixedDegreeStringKernel::init()
00022 {
00023     m_parameters->add(&degree, "degree", "The degree.");
00024     set_normalizer(new CSqrtDiagKernelNormalizer());
00025 }
00026 
00027 CFixedDegreeStringKernel::CFixedDegreeStringKernel()
00028 : CStringKernel<char>(0), degree(0)
00029 {
00030     init();
00031 }
00032 
00033 CFixedDegreeStringKernel::CFixedDegreeStringKernel(int32_t size, int32_t d)
00034 : CStringKernel<char>(size), degree(d)
00035 {
00036     init();
00037 }
00038 
00039 CFixedDegreeStringKernel::CFixedDegreeStringKernel(
00040     CStringFeatures<char>* l, CStringFeatures<char>* r, int32_t d)
00041 : CStringKernel<char>(10), degree(d)
00042 {
00043     init();
00044     init(l, r);
00045 }
00046 
00047 CFixedDegreeStringKernel::~CFixedDegreeStringKernel()
00048 {
00049     cleanup();
00050 }
00051 
00052 bool CFixedDegreeStringKernel::init(CFeatures* l, CFeatures* r)
00053 {
00054     CStringKernel<char>::init(l, r);
00055     return init_normalizer();
00056 }
00057 
00058 void CFixedDegreeStringKernel::cleanup()
00059 {
00060     CKernel::cleanup();
00061 }
00062 
00063 float64_t CFixedDegreeStringKernel::compute(int32_t idx_a, int32_t idx_b)
00064 {
00065     int32_t alen, blen;
00066     bool free_avec, free_bvec;
00067 
00068     char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00069     char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00070 
00071     // can only deal with strings of same length
00072     ASSERT(alen==blen);
00073 
00074     int64_t sum = 0;
00075     for (int32_t i = 0; i<alen-degree+1; i++)
00076     {
00077         bool match = true;
00078 
00079         for (int32_t j = i; j<i+degree && match; j++)
00080             match = avec[j]==bvec[j];
00081         if (match)
00082             sum++;
00083     }
00084     ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00085     ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00086 
00087     return sum;
00088 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation