SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
CanberraWordDistance.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) Christian Gehl
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
16 #include <shogun/io/SGIO.h>
17 
18 using namespace shogun;
19 
21 : CStringDistance<uint16_t>()
22 {
23  SG_DEBUG("CCanberraWordDistance created")
24 }
25 
28 : CStringDistance<uint16_t>()
29 {
30  SG_DEBUG("CCanberraWordDistance created")
31 
32  init(l, r);
33 }
34 
36 {
37  cleanup();
38 }
39 
40 bool CCanberraWordDistance::init(CFeatures* l, CFeatures* r)
41 {
43 }
44 
46 {
47 }
48 
49 float64_t CCanberraWordDistance::compute(int32_t idx_a, int32_t idx_b)
50 {
51  int32_t alen, blen;
52  bool free_avec, free_bvec;
53 
54  uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
55  get_feature_vector(idx_a, alen, free_avec);
56  uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
57  get_feature_vector(idx_b, blen, free_bvec);
58 
59  float64_t result=0;
60 
61  int32_t left_idx=0;
62  int32_t right_idx=0;
63 
64  while (left_idx < alen && right_idx < blen)
65  {
66  uint16_t sym=avec[left_idx];
67  if (avec[left_idx]==bvec[right_idx])
68  {
69  int32_t old_left_idx=left_idx;
70  int32_t old_right_idx=right_idx;
71 
72  while (left_idx< alen && avec[left_idx]==sym)
73  left_idx++;
74 
75  while (right_idx< blen && bvec[right_idx]==sym)
76  right_idx++;
77 
78  result +=
80  ((left_idx-old_left_idx)-(right_idx-old_right_idx)))/
81  ((float64_t)
82  ((left_idx-old_left_idx) + (right_idx-old_right_idx)));
83  }
84  else if (avec[left_idx]<bvec[right_idx])
85  {
86  result++;
87 
88  while (left_idx< alen && avec[left_idx]==sym)
89  left_idx++;
90  }
91  else
92  {
93  sym=bvec[right_idx];
94  result++;
95 
96  while (right_idx< blen && bvec[right_idx]==sym)
97  right_idx++;
98  }
99  }
100 
101  while (left_idx < alen)
102  {
103  uint16_t sym=avec[left_idx];
104  result++;
105 
106  while (left_idx< alen && avec[left_idx]==sym)
107  left_idx++;
108  }
109 
110  while (right_idx < blen)
111  {
112  uint16_t sym=bvec[right_idx];
113  result++;
114 
115  while (right_idx< blen && bvec[right_idx]==sym)
116  right_idx++;
117  }
119  free_feature_vector(avec, idx_a, free_avec);
121  free_feature_vector(bvec, idx_b, free_bvec);
122 
123  return result;
124 }
virtual bool init(CFeatures *l, CFeatures *r)
double float64_t
Definition: common.h:50
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CFeatures * lhs
feature vectors to occur on the left hand side
Definition: Distance.h:381
The class Features is the base class of all feature objects.
Definition: Features.h:68
CFeatures * rhs
feature vectors to occur on the right hand side
Definition: Distance.h:383
float64_t compute(int32_t idx_a, int32_t idx_b)
static T abs(T a)
Definition: Math.h:179
template class StringDistance

SHOGUN Machine Learning Toolbox - Documentation