SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
HammingWordDistance.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2007-2009 Christian Gehl
8  * Written (W) 1999-2009 Soeren Sonnenburg
9  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
10  */
11 
12 #include <shogun/lib/common.h>
13 #include <shogun/io/SGIO.h>
14 
15 #include <shogun/base/Parameter.h>
16 
20 
21 using namespace shogun;
22 
24 {
25  init();
26 }
27 
29 : CStringDistance<uint16_t>()
30 {
31  init();
32  use_sign=sign;
33 
34  SG_DEBUG("CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0)
35 }
36 
39 : CStringDistance<uint16_t>()
40 {
41  init();
42  use_sign=sign;
43 
44  SG_DEBUG("CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0)
45 
46  init(l, r);
47 }
48 
50 {
51  cleanup();
52 }
53 
54 bool CHammingWordDistance::init(CFeatures* l, CFeatures* r)
55 {
56  bool result=CStringDistance<uint16_t>::init(l,r);
57  return result;
58 }
59 
61 {
62 }
63 
64 float64_t CHammingWordDistance::compute(int32_t idx_a, int32_t idx_b)
65 {
66  int32_t alen, blen;
67  bool free_avec, free_bvec;
68 
69  uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
70  get_feature_vector(idx_a, alen, free_avec);
71  uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
72  get_feature_vector(idx_b, blen, free_bvec);
73 
74  int32_t result=0;
75 
76  int32_t left_idx=0;
77  int32_t right_idx=0;
78 
79  if (use_sign)
80  {
81  // hamming of: if words appear in both vectors
82  while (left_idx < alen && right_idx < blen)
83  {
84  uint16_t sym=avec[left_idx];
85  if (avec[left_idx]==bvec[right_idx])
86  {
87  while (left_idx< alen && avec[left_idx]==sym)
88  left_idx++;
89 
90  while (right_idx< blen && bvec[right_idx]==sym)
91  right_idx++;
92  }
93  else if (avec[left_idx]<bvec[right_idx])
94  {
95  result++;
96 
97  while (left_idx< alen && avec[left_idx]==sym)
98  left_idx++;
99  }
100  else
101  {
102  sym=bvec[right_idx];
103  result++;
104 
105  while (right_idx< blen && bvec[right_idx]==sym)
106  right_idx++;
107  }
108  }
109  }
110  else
111  {
112  //hamming of: if words appear in both vectors _the same number_ of times
113  while (left_idx < alen && right_idx < blen)
114  {
115  uint16_t sym=avec[left_idx];
116  if (avec[left_idx]==bvec[right_idx])
117  {
118  int32_t old_left_idx=left_idx;
119  int32_t old_right_idx=right_idx;
120 
121  while (left_idx< alen && avec[left_idx]==sym)
122  left_idx++;
123 
124  while (right_idx< blen && bvec[right_idx]==sym)
125  right_idx++;
126 
127  if ((left_idx-old_left_idx)!=(right_idx-old_right_idx))
128  result++;
129  }
130  else if (avec[left_idx]<bvec[right_idx])
131  {
132  result++;
133 
134  while (left_idx< alen && avec[left_idx]==sym)
135  left_idx++;
136  }
137  else
138  {
139  sym=bvec[right_idx];
140  result++;
141 
142  while (right_idx< blen && bvec[right_idx]==sym)
143  right_idx++;
144  }
145  }
146  }
147 
148  while (left_idx < alen)
149  {
150  uint16_t sym=avec[left_idx];
151  result++;
152 
153  while (left_idx< alen && avec[left_idx]==sym)
154  left_idx++;
155  }
156 
157  while (right_idx < blen)
158  {
159  uint16_t sym=bvec[right_idx];
160  result++;
161 
162  while (right_idx< blen && bvec[right_idx]==sym)
163  right_idx++;
164  }
165 
167  free_feature_vector(avec, idx_a, free_avec);
169  free_feature_vector(bvec, idx_b, free_bvec);
170 
171  return result;
172 }
173 
174 void CHammingWordDistance::init()
175 {
176  use_sign = false;
177  m_parameters->add(&use_sign, "use_sign",
178  "If signum(counts) is used instead of counts.");
179 }
Parameter * m_parameters
Definition: SGObject.h:546
void add(bool *param, const char *name, const char *description="")
Definition: Parameter.cpp:37
double float64_t
Definition: common.h:50
float64_t compute(int32_t idx_a, int32_t idx_b)
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CFeatures * lhs
feature vectors to occur on the left hand side
Definition: Distance.h:381
The class Features is the base class of all feature objects.
Definition: Features.h:68
CFeatures * rhs
feature vectors to occur on the right hand side
Definition: Distance.h:383
virtual bool init(CFeatures *l, CFeatures *r)
template class StringDistance

SHOGUN Machine Learning Toolbox - Documentation