SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
ManhattanWordDistance.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2009 Christian Gehl
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
16 #include <shogun/io/SGIO.h>
17 
18 using namespace shogun;
19 
21 : CStringDistance<uint16_t>()
22 {
23  SG_DEBUG("CManhattanWordDistance created")
24 }
25 
28 : CStringDistance<uint16_t>()
29 {
30  SG_DEBUG("CManhattanWordDistance created")
31 
32  init(l, r);
33 }
34 
36 {
37  cleanup();
38 }
39 
40 bool CManhattanWordDistance::init(CFeatures* l, CFeatures* r)
41 {
42  bool result=CStringDistance<uint16_t>::init(l,r);
43  return result;
44 }
45 
47 {
48 }
49 
50 float64_t CManhattanWordDistance::compute(int32_t idx_a, int32_t idx_b)
51 {
52  int32_t alen, blen;
53  bool free_avec, free_bvec;
54 
55  uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
56  get_feature_vector(idx_a, alen, free_avec);
57  uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
58  get_feature_vector(idx_b, blen, free_bvec);
59 
60  int32_t result=0;
61 
62  int32_t left_idx=0;
63  int32_t right_idx=0;
64 
65  while (left_idx < alen && right_idx < blen)
66  {
67  uint16_t sym=avec[left_idx];
68  if (avec[left_idx]==bvec[right_idx])
69  {
70  int32_t old_left_idx=left_idx;
71  int32_t old_right_idx=right_idx;
72 
73  while (left_idx< alen && avec[left_idx]==sym)
74  left_idx++;
75 
76  while (right_idx< blen && bvec[right_idx]==sym)
77  right_idx++;
78 
79  result += CMath::abs( (left_idx-old_left_idx) - (right_idx-old_right_idx) );
80  }
81  else if (avec[left_idx]<bvec[right_idx])
82  {
83 
84  while (left_idx< alen && avec[left_idx]==sym)
85  {
86  result++;
87  left_idx++;
88  }
89  }
90  else
91  {
92  sym=bvec[right_idx];
93 
94  while (right_idx< blen && bvec[right_idx]==sym)
95  {
96  result++;
97  right_idx++;
98  }
99  }
100  }
101 
102  result+=blen-right_idx + alen-left_idx;
103 
105  free_feature_vector(avec, idx_a, free_avec);
107  free_feature_vector(bvec, idx_b, free_bvec);
108 
109  return result;
110 }
111 
virtual bool init(CFeatures *l, CFeatures *r)
float64_t compute(int32_t idx_a, int32_t idx_b)
double float64_t
Definition: common.h:50
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CFeatures * lhs
feature vectors to occur on the left hand side
Definition: Distance.h:343
The class Features is the base class of all feature objects.
Definition: Features.h:68
CFeatures * rhs
feature vectors to occur on the right hand side
Definition: Distance.h:345
static T abs(T a)
Definition: Math.h:179
template class StringDistance

SHOGUN Machine Learning Toolbox - Documentation