00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/lib/common.h>
00012 #include <shogun/io/SGIO.h>
00013 #include <shogun/kernel/SimpleLocalityImprovedStringKernel.h>
00014 #include <shogun/features/Features.h>
00015 #include <shogun/features/StringFeatures.h>
00016
00017 using namespace shogun;
00018
00019 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel()
00020 : CStringKernel<char>()
00021 {
00022 init();
00023 }
00024
00025 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00026 int32_t size, int32_t l, int32_t id, int32_t od)
00027 : CStringKernel<char>(size)
00028 {
00029 init();
00030
00031 length=l;
00032 inner_degree=id;
00033 outer_degree=od;
00034 }
00035
00036 CSimpleLocalityImprovedStringKernel::CSimpleLocalityImprovedStringKernel(
00037 CStringFeatures<char>* l, CStringFeatures<char>* r,
00038 int32_t len, int32_t id, int32_t od)
00039 : CStringKernel<char>()
00040 {
00041 init();
00042
00043 length=len;
00044 inner_degree=id;
00045 outer_degree=od;
00046
00047 init(l, r);
00048 }
00049
00050 CSimpleLocalityImprovedStringKernel::~CSimpleLocalityImprovedStringKernel()
00051 {
00052 cleanup();
00053 }
00054
00055 bool CSimpleLocalityImprovedStringKernel::init(CFeatures* l, CFeatures* r)
00056 {
00057 bool result = CStringKernel<char>::init(l,r);
00058
00059 if (!result)
00060 return false;
00061 const int32_t num_features = ((CStringFeatures<char>*) l)->get_max_vector_length();
00062 const int32_t PYRAL = 2 * length - 1;
00063 const int32_t pyra_len = num_features-PYRAL+1;
00064 const int32_t pyra_len2 = (int32_t) pyra_len/2;
00065
00066 SG_FREE(pyramid_weights);
00067
00068 pyramid_weights = SG_MALLOC(float64_t, pyra_len);
00069 num_pyramid_weights=pyra_len;
00070
00071 SG_INFO("initializing pyramid weights: size=%ld length=%i\n",
00072 num_features, length);
00073
00074 float64_t PYRAL_pot;
00075 int32_t DEGREE1_1 = (inner_degree & 0x1)==0;
00076 int32_t DEGREE1_1n = (inner_degree & ~0x1)!=0;
00077 int32_t DEGREE1_2 = (inner_degree & 0x2)!=0;
00078 int32_t DEGREE1_3 = (inner_degree & ~0x3)!=0;
00079 int32_t DEGREE1_4 = (inner_degree & 0x4)!=0;
00080 {
00081 float64_t PYRAL_ = PYRAL;
00082 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00083 if (DEGREE1_1n)
00084 {
00085 PYRAL_ *= PYRAL_;
00086 if (DEGREE1_2)
00087 PYRAL_pot *= PYRAL_;
00088 if (DEGREE1_3)
00089 {
00090 PYRAL_ *= PYRAL_;
00091 if (DEGREE1_4)
00092 PYRAL_pot *= PYRAL_;
00093 }
00094 }
00095 }
00096
00097 {
00098 int32_t j;
00099 for (j = 0; j < pyra_len; j++)
00100 pyramid_weights[j] = 4*((float64_t)((j < pyra_len2)? j+1 : pyra_len-j))/((float64_t)pyra_len);
00101 for (j = 0; j < pyra_len; j++)
00102 pyramid_weights[j] /= PYRAL_pot;
00103 }
00104
00105 return init_normalizer();
00106 }
00107
00108 void CSimpleLocalityImprovedStringKernel::cleanup()
00109 {
00110 SG_FREE(pyramid_weights);
00111 pyramid_weights = NULL;
00112 num_pyramid_weights = 0;
00113
00114 CKernel::cleanup();
00115 }
00116
00117 float64_t CSimpleLocalityImprovedStringKernel::dot_pyr (const char* const x1,
00118 const char* const x2, const int32_t NOF_NTS, const int32_t NTWIDTH,
00119 const int32_t DEGREE1, const int32_t DEGREE2, float64_t *pyra)
00120 {
00121 const int32_t PYRAL = 2*NTWIDTH-1;
00122 int32_t pyra_len, pyra_len2;
00123 float64_t pot, PYRAL_pot;
00124 float64_t sum;
00125 int32_t DEGREE1_1 = (DEGREE1 & 0x1)==0;
00126 int32_t DEGREE1_1n = (DEGREE1 & ~0x1)!=0;
00127 int32_t DEGREE1_2 = (DEGREE1 & 0x2)!=0;
00128 int32_t DEGREE1_3 = (DEGREE1 & ~0x3)!=0;
00129 int32_t DEGREE1_4 = (DEGREE1 & 0x4)!=0;
00130 {
00131 float64_t PYRAL_ = PYRAL;
00132 PYRAL_pot = DEGREE1_1 ? 1.0 : PYRAL_;
00133 if (DEGREE1_1n)
00134 {
00135 PYRAL_ *= PYRAL_;
00136 if (DEGREE1_2) PYRAL_pot *= PYRAL_;
00137 if (DEGREE1_3)
00138 {
00139 PYRAL_ *= PYRAL_;
00140 if (DEGREE1_4) PYRAL_pot *= PYRAL_;
00141 }
00142 }
00143 }
00144
00145 ASSERT((DEGREE1 & ~0x7) == 0);
00146 ASSERT((DEGREE2 & ~0x7) == 0);
00147
00148 pyra_len = NOF_NTS-PYRAL+1;
00149 pyra_len2 = (int32_t) pyra_len/2;
00150 {
00151 int32_t j;
00152 for (j = 0; j < pyra_len; j++)
00153 pyra[j] = 4*((float64_t)((j < pyra_len2) ? j+1 : pyra_len-j))/((float64_t)pyra_len);
00154 for (j = 0; j < pyra_len; j++)
00155 pyra[j] /= PYRAL_pot;
00156 }
00157
00158 register int32_t conv;
00159 register int32_t i;
00160 register int32_t j;
00161
00162 sum = 0.0;
00163 conv = 0;
00164 for (j = 0; j < PYRAL; j++)
00165 conv += (x1[j] == x2[j]) ? 1 : 0;
00166
00167 for (i = 0; i < NOF_NTS-PYRAL+1; i++)
00168 {
00169 register float64_t pot2;
00170 if (i>0)
00171 conv += ((x1[i+PYRAL-1] == x2[i+PYRAL-1]) ? 1 : 0 ) -
00172 ((x1[i-1] == x2[i-1]) ? 1 : 0);
00173 {
00174 register float64_t conv2 = conv;
00175 pot2 = (DEGREE1_1) ? 1.0 : conv2;
00176 if (DEGREE1_1n)
00177 {
00178 conv2 *= conv2;
00179 if (DEGREE1_2)
00180 pot2 *= conv2;
00181 if (DEGREE1_3 && DEGREE1_4)
00182 pot2 *= conv2*conv2;
00183 }
00184 }
00185 sum += pot2*pyra[i];
00186 }
00187
00188 pot = ((DEGREE2 & 0x1) == 0) ? 1.0 : sum;
00189 if ((DEGREE2 & ~0x1) != 0)
00190 {
00191 sum *= sum;
00192 if ((DEGREE2 & 0x2) != 0)
00193 pot *= sum;
00194 if ((DEGREE2 & ~0x3) != 0)
00195 {
00196 sum *= sum;
00197 if ((DEGREE2 & 0x4) != 0)
00198 pot *= sum;
00199 }
00200 }
00201 return pot;
00202 }
00203
00204 float64_t CSimpleLocalityImprovedStringKernel::compute(
00205 int32_t idx_a, int32_t idx_b)
00206 {
00207 int32_t alen, blen;
00208 bool free_avec, free_bvec;
00209
00210 char* avec = ((CStringFeatures<char>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00211 char* bvec = ((CStringFeatures<char>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00212
00213
00214 ASSERT(alen==blen);
00215
00216 float64_t dpt;
00217
00218 dpt = dot_pyr(avec, bvec, alen, length, inner_degree, outer_degree, pyramid_weights);
00219 dpt = dpt / pow((float64_t) alen, (float64_t) outer_degree);
00220
00221 ((CStringFeatures<char>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00222 ((CStringFeatures<char>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00223 return (float64_t) dpt;
00224 }
00225
00226 void CSimpleLocalityImprovedStringKernel::init()
00227 {
00228 length = 3;
00229 inner_degree = 3;
00230 outer_degree = 1;
00231 pyramid_weights=NULL;
00232 num_pyramid_weights=0;
00233
00234 m_parameters->add(&length, "length", "Window Length.");
00235 m_parameters->add(&inner_degree, "inner_degree", "Inner degree.");
00236 m_parameters->add(&outer_degree, "outer_degree", "Outer degree.");
00237
00238 m_parameters->add_vector(&pyramid_weights, &num_pyramid_weights,
00239 "pyramid_weights", "Pyramid weights.");
00240 }