HammingWordDistance.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2007-2009 Christian Gehl
00008  * Written (W) 1999-2009 Soeren Sonnenburg
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "lib/common.h"
00013 #include "lib/io.h"
00014 
00015 #include "base/Parameter.h"
00016 
00017 #include "distance/HammingWordDistance.h"
00018 #include "features/Features.h"
00019 #include "features/StringFeatures.h"
00020 
00021 using namespace shogun;
00022 
00023 CHammingWordDistance::CHammingWordDistance()
00024 {
00025     init();
00026 }
00027 
00028 CHammingWordDistance::CHammingWordDistance(bool sign)
00029 : CStringDistance<uint16_t>()
00030 {
00031     init();
00032     use_sign=sign;
00033 
00034     SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00035 }
00036 
00037 CHammingWordDistance::CHammingWordDistance(
00038     CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r, bool sign)
00039 : CStringDistance<uint16_t>()
00040 {
00041     init();
00042     use_sign=sign;
00043 
00044     SG_DEBUG( "CHammingWordDistance with sign: %d created\n", (sign) ? 1 : 0);
00045 
00046     init(l, r);
00047 }
00048 
00049 CHammingWordDistance::~CHammingWordDistance()
00050 {
00051     cleanup();
00052 }
00053   
00054 bool CHammingWordDistance::init(CFeatures* l, CFeatures* r)
00055 {
00056     bool result=CStringDistance<uint16_t>::init(l,r);
00057     return result;
00058 }
00059 
00060 void CHammingWordDistance::cleanup()
00061 {
00062 }
00063 
00064 float64_t CHammingWordDistance::compute(int32_t idx_a, int32_t idx_b)
00065 {
00066     int32_t alen, blen;
00067     bool free_avec, free_bvec;
00068 
00069     uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)->
00070         get_feature_vector(idx_a, alen, free_avec);
00071     uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)->
00072         get_feature_vector(idx_b, blen, free_bvec);
00073 
00074     int32_t result=0;
00075 
00076     int32_t left_idx=0;
00077     int32_t right_idx=0;
00078 
00079     if (use_sign)
00080     {
00081         // hamming of: if words appear in both vectors 
00082         while (left_idx < alen && right_idx < blen)
00083         {
00084             uint16_t sym=avec[left_idx];
00085             if (avec[left_idx]==bvec[right_idx])
00086             {
00087                 while (left_idx< alen && avec[left_idx]==sym)
00088                     left_idx++;
00089 
00090                 while (right_idx< blen && bvec[right_idx]==sym)
00091                     right_idx++;
00092             }
00093             else if (avec[left_idx]<bvec[right_idx])
00094             {
00095                 result++;
00096 
00097                 while (left_idx< alen && avec[left_idx]==sym)
00098                     left_idx++;
00099             }
00100             else
00101             {
00102                 sym=bvec[right_idx];
00103                 result++;
00104 
00105                 while (right_idx< blen && bvec[right_idx]==sym)
00106                     right_idx++;
00107             }
00108         }
00109     }
00110     else
00111     {
00112         //hamming of: if words appear in both vectors _the same number_ of times
00113         while (left_idx < alen && right_idx < blen)
00114         {
00115             uint16_t sym=avec[left_idx];
00116             if (avec[left_idx]==bvec[right_idx])
00117             {
00118                 int32_t old_left_idx=left_idx;
00119                 int32_t old_right_idx=right_idx;
00120 
00121                 while (left_idx< alen && avec[left_idx]==sym)
00122                     left_idx++;
00123 
00124                 while (right_idx< blen && bvec[right_idx]==sym)
00125                     right_idx++;
00126 
00127                 if ((left_idx-old_left_idx)!=(right_idx-old_right_idx))
00128                     result++;
00129             }
00130             else if (avec[left_idx]<bvec[right_idx])
00131             {
00132                 result++;
00133 
00134                 while (left_idx< alen && avec[left_idx]==sym)
00135                     left_idx++;
00136             }
00137             else
00138             {
00139                 sym=bvec[right_idx];
00140                 result++;
00141 
00142                 while (right_idx< blen && bvec[right_idx]==sym)
00143                     right_idx++;
00144             }
00145         }
00146     }
00147 
00148     while (left_idx < alen)
00149     {
00150         uint16_t sym=avec[left_idx];
00151         result++;
00152 
00153         while (left_idx< alen && avec[left_idx]==sym)
00154             left_idx++;
00155     }
00156 
00157     while (right_idx < blen)
00158     {
00159         uint16_t sym=bvec[right_idx];
00160         result++;
00161 
00162         while (right_idx< blen && bvec[right_idx]==sym)
00163             right_idx++;
00164     }
00165 
00166     ((CStringFeatures<uint16_t>*) lhs)->
00167         free_feature_vector(avec, idx_a, free_avec);
00168     ((CStringFeatures<uint16_t>*) rhs)->
00169         free_feature_vector(bvec, idx_b, free_bvec);
00170 
00171     return result;
00172 }
00173 
00174 void CHammingWordDistance::init()
00175 {
00176     use_sign = false;
00177     m_parameters->add(&use_sign, "use_sign",
00178             "If signum(counts) is used instead of counts.");
00179 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation