Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/lib/common.h>
00013 #include <shogun/distance/CanberraWordDistance.h>
00014 #include <shogun/features/Features.h>
00015 #include <shogun/features/StringFeatures.h>
00016 #include <shogun/io/SGIO.h>
00017
00018 using namespace shogun;
00019
00020 CCanberraWordDistance::CCanberraWordDistance()
00021 : CStringDistance<uint16_t>()
00022 {
00023 SG_DEBUG("CCanberraWordDistance created");
00024 }
00025
00026 CCanberraWordDistance::CCanberraWordDistance(
00027 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r)
00028 : CStringDistance<uint16_t>()
00029 {
00030 SG_DEBUG("CCanberraWordDistance created");
00031
00032 init(l, r);
00033 }
00034
00035 CCanberraWordDistance::~CCanberraWordDistance()
00036 {
00037 cleanup();
00038 }
00039
00040 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r)
00041 {
00042 return CStringDistance<uint16_t>::init(l,r);
00043 }
00044
00045 void CCanberraWordDistance::cleanup()
00046 {
00047 }
00048
00049 float64_t CCanberraWordDistance::compute(int32_t idx_a, int32_t idx_b)
00050 {
00051 int32_t alen, blen;
00052 bool free_avec, free_bvec;
00053
00054 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00055 get_feature_vector(idx_a, alen, free_avec);
00056 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00057 get_feature_vector(idx_b, blen, free_bvec);
00058
00059 float64_t result=0;
00060
00061 int32_t left_idx=0;
00062 int32_t right_idx=0;
00063
00064 while (left_idx < alen && right_idx < blen)
00065 {
00066 uint16_t sym=avec[left_idx];
00067 if (avec[left_idx]==bvec[right_idx])
00068 {
00069 int32_t old_left_idx=left_idx;
00070 int32_t old_right_idx=right_idx;
00071
00072 while (left_idx< alen && avec[left_idx]==sym)
00073 left_idx++;
00074
00075 while (right_idx< blen && bvec[right_idx]==sym)
00076 right_idx++;
00077
00078 result +=
00079 CMath::abs((float64_t)
00080 ((left_idx-old_left_idx)-(right_idx-old_right_idx)))/
00081 ((float64_t)
00082 ((left_idx-old_left_idx) + (right_idx-old_right_idx)));
00083 }
00084 else if (avec[left_idx]<bvec[right_idx])
00085 {
00086 result++;
00087
00088 while (left_idx< alen && avec[left_idx]==sym)
00089 left_idx++;
00090 }
00091 else
00092 {
00093 sym=bvec[right_idx];
00094 result++;
00095
00096 while (right_idx< blen && bvec[right_idx]==sym)
00097 right_idx++;
00098 }
00099 }
00100
00101 while (left_idx < alen)
00102 {
00103 uint16_t sym=avec[left_idx];
00104 result++;
00105
00106 while (left_idx< alen && avec[left_idx]==sym)
00107 left_idx++;
00108 }
00109
00110 while (right_idx < blen)
00111 {
00112 uint16_t sym=bvec[right_idx];
00113 result++;
00114
00115 while (right_idx< blen && bvec[right_idx]==sym)
00116 right_idx++;
00117 }
00118 ((CStringFeatures<uint16_t>*) lhs)->
00119 free_feature_vector(avec, idx_a, free_avec);
00120 ((CStringFeatures<uint16_t>*) rhs)->
00121 free_feature_vector(bvec, idx_b, free_bvec);
00122
00123 return result;
00124 }