CanberraWordDistance.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) Christian Gehl
00008  * Written (W) 1999-2009 Soeren Sonnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/lib/common.h>
00013 #include <shogun/distance/CanberraWordDistance.h>
00014 #include <shogun/features/Features.h>
00015 #include <shogun/features/StringFeatures.h>
00016 #include <shogun/io/SGIO.h>
00017 
00018 using namespace shogun;
00019 
00020 CCanberraWordDistance::CCanberraWordDistance()
00021 : CStringDistance<uint16_t>()
00022 {
00023     SG_DEBUG("CCanberraWordDistance created");
00024 }
00025 
00026 CCanberraWordDistance::CCanberraWordDistance(
00027     CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r)
00028 : CStringDistance<uint16_t>()
00029 {
00030     SG_DEBUG("CCanberraWordDistance created");
00031 
00032     init(l, r);
00033 }
00034 
00035 CCanberraWordDistance::~CCanberraWordDistance()
00036 {
00037     cleanup();
00038 }
00039 
00040 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r)
00041 {
00042     return CStringDistance<uint16_t>::init(l,r);
00043 }
00044 
00045 void CCanberraWordDistance::cleanup()
00046 {
00047 }
00048 
00049 float64_t CCanberraWordDistance::compute(int32_t idx_a, int32_t idx_b)
00050 {
00051     int32_t alen, blen;
00052     bool free_avec, free_bvec;
00053 
00054     uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00055         get_feature_vector(idx_a, alen, free_avec);
00056     uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00057         get_feature_vector(idx_b, blen, free_bvec);
00058 
00059     float64_t result=0;
00060 
00061     int32_t left_idx=0;
00062     int32_t right_idx=0;
00063 
00064     while (left_idx < alen && right_idx < blen)
00065     {
00066         uint16_t sym=avec[left_idx];
00067         if (avec[left_idx]==bvec[right_idx])
00068         {
00069             int32_t old_left_idx=left_idx;
00070             int32_t old_right_idx=right_idx;
00071 
00072             while (left_idx< alen && avec[left_idx]==sym)
00073                 left_idx++;
00074 
00075             while (right_idx< blen && bvec[right_idx]==sym)
00076                 right_idx++;
00077 
00078             result +=
00079                 CMath::abs((float64_t)
00080                     ((left_idx-old_left_idx)-(right_idx-old_right_idx)))/
00081                 ((float64_t)
00082                     ((left_idx-old_left_idx) + (right_idx-old_right_idx)));
00083         }
00084         else if (avec[left_idx]<bvec[right_idx])
00085         {
00086             result++;
00087 
00088             while (left_idx< alen && avec[left_idx]==sym)
00089                 left_idx++;
00090         }
00091         else
00092         {
00093             sym=bvec[right_idx];
00094             result++;
00095 
00096             while (right_idx< blen && bvec[right_idx]==sym)
00097                 right_idx++;
00098         }
00099     }
00100     
00101     while (left_idx < alen)
00102     {
00103         uint16_t sym=avec[left_idx];
00104         result++;
00105 
00106         while (left_idx< alen && avec[left_idx]==sym)
00107             left_idx++;
00108     }
00109 
00110     while (right_idx < blen)
00111     {
00112         uint16_t sym=bvec[right_idx];
00113         result++;
00114 
00115         while (right_idx< blen && bvec[right_idx]==sym)
00116             right_idx++;
00117     }
00118     ((CStringFeatures<uint16_t>*) lhs)->
00119         free_feature_vector(avec, idx_a, free_avec);
00120     ((CStringFeatures<uint16_t>*) rhs)->
00121         free_feature_vector(bvec, idx_b, free_bvec);
00122 
00123     return result;
00124 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation