00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/lib/common.h>
00012 #include <shogun/io/SGIO.h>
00013 #include <shogun/kernel/string/SimpleLocalityImprovedStringKernel.h>
00014 #include <shogun/features/Features.h>
00015 #include <shogun/features/StringFeatures.h>
00016
00017 using namespace shogun;
00018
00019 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel()
00020 : CStringKernel<char>()
00021 {
00022 SG_UNSTABLE("SimpleLocalityImprovedStringKernel");
00023 init();
00024 }
00025
00026 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00027 int32_t size, int32_t l, int32_t id, int32_t od)
00028 : CStringKernel<char>(size)
00029 {
00030 SG_UNSTABLE("SimpleLocalityImprovedStringKernel");
00031 init();
00032
00033 length=l;
00034 inner_degree=id;
00035 outer_degree=od;
00036 }
00037
00038 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00039 CStringFeatures<char>* l, CStringFeatures<char>* r,
00040 int32_t len, int32_t id, int32_t od)
00041 : CStringKernel<char>()
00042 {
00043 SG_UNSTABLE("SimpleLocalityImprovedStringKernel");
00044 init();
00045
00046 length=len;
00047 inner_degree=id;
00048 outer_degree=od;
00049
00050 init(l, r);
00051 }
00052
00053 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00054 {
00055 cleanup();
00056 }
00057
00058 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00059 {
00060 bool result = CStringKernel<char>::init(l,r);
00061
00062 if (!result)
00063 return false;
00064 const int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
00065 const int32_t PYRAL = 2 * length - 1;
00066 const int32_t pyra_len = num_features-PYRAL+1;
00067 const int32_t pyra_len2 = (int32_t) pyra_len/2;
00068
00069 SG_FREE(pyramid_weights);
00070
00071 pyramid_weights = SG_MALLOC(float64_t, pyra_len);
00072 num_pyramid_weights=pyra_len;
00073
00074 SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
00075 num_features, length);
00076
00077 float64_t PYRAL_pot;
00078 int32_t DEGREE1_1 = (inner_degree & 0x1)==0;
00079 int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
00080 int32_t DEGREE1_2 = (inner_degree & 0x2)!=0;
00081 int32_t DEGREE1_3 = (inner_degree & ~0x3)!=0;
00082 int32_t DEGREE1_4 = (inner_degree & 0x4)!=0;
00083 {
00084 float64_t PYRAL_ = PYRAL;
00085 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00086 if (DEGREE1_1n)
00087 {
00088 PYRAL_ *= PYRAL_;
00089 if (DEGREE1_2)
00090 PYRAL_pot *= PYRAL_;
00091 if (DEGREE1_3)
00092 {
00093 PYRAL_ *= PYRAL_;
00094 if (DEGREE1_4)
00095 PYRAL_pot *= PYRAL_;
00096 }
00097 }
00098 }
00099
00100 {
00101 int32_t j;
00102 for (j = 0; j < pyra_len; j++)
00103 pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
00104 for (j = 0; j < pyra_len; j++)
00105 pyramid_weights[j] /= PYRAL_pot;
00106 }
00107
00108 return init_normalizer();
00109 }
00110
00111 void CSimpleLocalityImprovedStringKernel::cleanup()
00112 {
00113 SG_FREE(pyramid_weights);
00114 pyramid_weights = NULL;
00115 num_pyramid_weights = 0;
00116
00117 CKernel::cleanup();
00118 }
00119
00120 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
00121 const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
00122 const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
00123 {
00124 const int32_t PYRAL = 2*NTWIDTH-1;
00125 int32_t pyra_len, pyra_len2;
00126 float64_t pot, PYRAL_pot;
00127 float64_t sum;
00128 int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
00129 int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00130 int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00131 int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00132 int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00133 {
00134 float64_t PYRAL_ = PYRAL;
00135 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00136 if (DEGREE1_1n)
00137 {
00138 PYRAL_ *= PYRAL_;
00139 if (DEGREE1_2) PYRAL_pot *= PYRAL_;
00140 if (DEGREE1_3)
00141 {
00142 PYRAL_ *= PYRAL_;
00143 if (DEGREE1_4) PYRAL_pot *= PYRAL_;
00144 }
00145 }
00146 }
00147
00148 ASSERT((DEGREE1 & ~0x7) == 0);
00149 ASSERT((DEGREE2 & ~0x7) == 0);
00150
00151 pyra_len = NOF_NTS-PYRAL+1;
00152 pyra_len2 = (int32_t) pyra_len/2;
00153 {
00154 int32_t j;
00155 for (j = 0; j < pyra_len; j++)
00156 pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len);
00157 for (j = 0; j < pyra_len; j++)
00158 pyra[j] /= PYRAL_pot;
00159 }
00160
00161 register int32_t conv;
00162 register int32_t i;
00163 register int32_t j;
00164
00165 sum = 0.0;
00166 conv = 0;
00167 for (j = 0; j < PYRAL; j++)
00168 conv += (x1[j] == x2[j]) ? 1 : 0;
00169
00170 for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00171 {
00172 register float64_t pot2;
00173 if (i>0)
00174 conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
00175 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00176 {
00177 register float64_t conv2 = conv;
00178 pot2 = (DEGREE1_1) ? 1.0 : conv2;
00179 if (DEGREE1_1n)
00180 {
00181 conv2 *= conv2;
00182 if (DEGREE1_2)
00183 pot2 *= conv2;
00184 if (DEGREE1_3 && DEGREE1_4)
00185 pot2 *= conv2*conv2;
00186 }
00187 }
00188 sum += pot2*pyra[i];
00189 }
00190
00191 pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00192 if ((DEGREE2 & ~0x1) != 0)
00193 {
00194 sum *= sum;
00195 if ((DEGREE2 & 0x2) != 0)
00196 pot *= sum;
00197 if ((DEGREE2 & ~0x3) != 0)
00198 {
00199 sum *= sum;
00200 if ((DEGREE2 & 0x4) != 0)
00201 pot *= sum;
00202 }
00203 }
00204 return pot;
00205 }
00206
00207 float64_t CSimpleLocalityImprovedStringKernel::compute(
00208 int32_t idx_a, int32_t idx_b)
00209 {
00210 int32_t alen, blen;
00211 bool free_avec, free_bvec;
00212
00213 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00214 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00215
00216
00217 ASSERT(alen==blen);
00218
00219 float64_t dpt;
00220
00221 dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00222 dpt = dpt / pow((float64_t) alen, (float64_t) outer_degree);
00223
00224 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00225 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00226 return (float64_t) dpt;
00227 }
00228
00229 void CSimpleLocalityImprovedStringKernel::init()
00230 {
00231 length = 3;
00232 inner_degree = 3;
00233 outer_degree = 1;
00234 pyramid_weights=NULL;
00235 num_pyramid_weights=0;
00236
00237 SG_ADD(&length, "length", "Window Length.", MS_AVAILABLE);
00238 SG_ADD(&inner_degree, "inner_degree", "Inner degree.", MS_AVAILABLE);
00239 SG_ADD(&outer_degree, "outer_degree", "Outer degree.", MS_AVAILABLE);
00240
00241 m_parameters->add_vector(&pyramid_weights, &num_pyramid_weights,
00242 "pyramid_weights", "Pyramid weights.");
00243 }