CommUlongStringKernel.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include "lib/common.h"
00012 #include "kernel/CommUlongStringKernel.h"
00013 #include "kernel/SqrtDiagKernelNormalizer.h"
00014 #include "features/StringFeatures.h"
00015 #include "lib/io.h"
00016 
00017 using namespace shogun;
00018 
00019 CCommUlongStringKernel::CCommUlongStringKernel(int32_t size, bool us)
00020 : CStringKernel<uint64_t>(size), use_sign(us)
00021 {
00022     properties |= KP_LINADD;
00023     clear_normal();
00024 
00025     set_normalizer(new CSqrtDiagKernelNormalizer());
00026 }
00027 
00028 CCommUlongStringKernel::CCommUlongStringKernel(
00029     CStringFeatures<uint64_t>* l, CStringFeatures<uint64_t>* r, bool us,
00030     int32_t size)
00031 : CStringKernel<uint64_t>(size), use_sign(us)
00032 {
00033     properties |= KP_LINADD;
00034     clear_normal();
00035     set_normalizer(new CSqrtDiagKernelNormalizer());
00036     init(l,r);
00037 }
00038 
00039 CCommUlongStringKernel::~CCommUlongStringKernel()
00040 {
00041     cleanup();
00042 }
00043 
00044 void CCommUlongStringKernel::remove_lhs()
00045 {
00046     delete_optimization();
00047 
00048 #ifdef SVMLIGHT
00049     if (lhs)
00050         cache_reset();
00051 #endif
00052 
00053     lhs = NULL ; 
00054     rhs = NULL ; 
00055 }
00056 
00057 void CCommUlongStringKernel::remove_rhs()
00058 {
00059 #ifdef SVMLIGHT
00060     if (rhs)
00061         cache_reset();
00062 #endif
00063 
00064     rhs = lhs;
00065 }
00066 
00067 bool CCommUlongStringKernel::init(CFeatures* l, CFeatures* r)
00068 {
00069     CStringKernel<uint64_t>::init(l,r);
00070     return init_normalizer();
00071 }
00072 
00073 void CCommUlongStringKernel::cleanup()
00074 {
00075     delete_optimization();
00076     clear_normal();
00077     CKernel::cleanup();
00078 }
00079 
00080 float64_t CCommUlongStringKernel::compute(int32_t idx_a, int32_t idx_b)
00081 {
00082     int32_t alen, blen;
00083     bool free_avec, free_bvec;
00084     uint64_t* avec=((CStringFeatures<uint64_t>*) lhs)->get_feature_vector(idx_a, alen, free_avec);
00085     uint64_t* bvec=((CStringFeatures<uint64_t>*) rhs)->get_feature_vector(idx_b, blen, free_bvec);
00086 
00087     float64_t result=0;
00088 
00089     int32_t left_idx=0;
00090     int32_t right_idx=0;
00091 
00092     if (use_sign)
00093     {
00094         while (left_idx < alen && right_idx < blen)
00095         {
00096             if (avec[left_idx]==bvec[right_idx])
00097             {
00098                 uint64_t sym=avec[left_idx];
00099 
00100                 while (left_idx< alen && avec[left_idx]==sym)
00101                     left_idx++;
00102 
00103                 while (right_idx< blen && bvec[right_idx]==sym)
00104                     right_idx++;
00105 
00106                 result++;
00107             }
00108             else if (avec[left_idx]<bvec[right_idx])
00109                 left_idx++;
00110             else
00111                 right_idx++;
00112         }
00113     }
00114     else
00115     {
00116         while (left_idx < alen && right_idx < blen)
00117         {
00118             if (avec[left_idx]==bvec[right_idx])
00119             {
00120                 int32_t old_left_idx=left_idx;
00121                 int32_t old_right_idx=right_idx;
00122 
00123                 uint64_t sym=avec[left_idx];
00124 
00125                 while (left_idx< alen && avec[left_idx]==sym)
00126                     left_idx++;
00127 
00128                 while (right_idx< blen && bvec[right_idx]==sym)
00129                     right_idx++;
00130 
00131                 result+=((float64_t) (left_idx-old_left_idx)) * ((float64_t) (right_idx-old_right_idx));
00132             }
00133             else if (avec[left_idx]<bvec[right_idx])
00134                 left_idx++;
00135             else
00136                 right_idx++;
00137         }
00138     }
00139     ((CStringFeatures<uint64_t>*) lhs)->free_feature_vector(avec, idx_a, free_avec);
00140     ((CStringFeatures<uint64_t>*) rhs)->free_feature_vector(bvec, idx_b, free_bvec);
00141 
00142     return result;
00143 }
00144 
00145 void CCommUlongStringKernel::add_to_normal(int32_t vec_idx, float64_t weight)
00146 {
00147     int32_t t=0;
00148     int32_t j=0;
00149     int32_t k=0;
00150     int32_t last_j=0;
00151     int32_t len=-1;
00152     bool free_vec;
00153     uint64_t* vec=((CStringFeatures<uint64_t>*) lhs)->get_feature_vector(vec_idx, len, free_vec);
00154 
00155     if (vec && len>0)
00156     {
00157         //use malloc not new [] as DynamicArray uses it
00158         uint64_t* dic= (uint64_t*) malloc(
00159             (len+dictionary.get_num_elements())*sizeof(uint64_t));
00160         float64_t* dic_weights= (float64_t*) malloc(
00161             (len+dictionary.get_num_elements())*sizeof(float64_t));
00162 
00163         if (use_sign)
00164         {
00165             for (j=1; j<len; j++)
00166             {
00167                 if (vec[j]==vec[j-1])
00168                     continue;
00169 
00170                 merge_dictionaries(t, j, k, vec, dic, dic_weights, weight, vec_idx);
00171             }
00172 
00173             merge_dictionaries(t, j, k, vec, dic, dic_weights, weight, vec_idx);
00174 
00175             while (k<dictionary.get_num_elements())
00176             {
00177                 dic[t]=dictionary[k];
00178                 dic_weights[t]=dictionary_weights[k];
00179                 t++;
00180                 k++;
00181             }
00182         }
00183         else
00184         {
00185             for (j=1; j<len; j++)
00186             {
00187                 if (vec[j]==vec[j-1])
00188                     continue;
00189 
00190                 merge_dictionaries(t, j, k, vec, dic, dic_weights, weight*(j-last_j), vec_idx);
00191                 last_j = j;
00192             }
00193 
00194             merge_dictionaries(t, j, k, vec, dic, dic_weights, weight*(j-last_j), vec_idx);
00195 
00196             while (k<dictionary.get_num_elements())
00197             {
00198                 dic[t]=dictionary[k];
00199                 dic_weights[t]=dictionary_weights[k];
00200                 t++;
00201                 k++;
00202             }
00203         }
00204 
00205         dictionary.set_array(dic, t, len+dictionary.get_num_elements());
00206         dictionary_weights.set_array(dic_weights, t, len+dictionary.get_num_elements());
00207     }
00208     ((CStringFeatures<uint64_t>*) lhs)->free_feature_vector(vec, vec_idx, free_vec);
00209 
00210     set_is_initialized(true);
00211 }
00212 
00213 void CCommUlongStringKernel::clear_normal()
00214 {
00215     dictionary.resize_array(0);
00216     dictionary_weights.resize_array(0);
00217     set_is_initialized(false);
00218 }
00219 
00220 bool CCommUlongStringKernel::init_optimization(
00221     int32_t count, int32_t *IDX, float64_t * weights)
00222 {
00223     clear_normal();
00224 
00225     if (count<=0)
00226     {
00227         set_is_initialized(true);
00228         SG_DEBUG( "empty set of SVs\n");
00229         return true;
00230     }
00231 
00232     SG_DEBUG( "initializing CCommUlongStringKernel optimization\n");
00233 
00234     for (int32_t i=0; i<count; i++)
00235     {
00236         if ( (i % (count/10+1)) == 0)
00237             SG_PROGRESS(i, 0, count);
00238 
00239         add_to_normal(IDX[i], weights[i]);
00240     }
00241 
00242     SG_PRINT( "Done.         \n");
00243     
00244     set_is_initialized(true);
00245     return true;
00246 }
00247 
00248 bool CCommUlongStringKernel::delete_optimization() 
00249 {
00250     SG_DEBUG( "deleting CCommUlongStringKernel optimization\n");
00251     clear_normal();
00252     return true;
00253 }
00254 
00255 // binary search for each feature. trick: as features are sorted save last found idx in old_idx and
00256 // only search in the remainder of the dictionary
00257 float64_t CCommUlongStringKernel::compute_optimized(int32_t i)
00258 { 
00259     float64_t result = 0;
00260     int32_t j, last_j=0;
00261     int32_t old_idx = 0;
00262 
00263     if (!get_is_initialized())
00264     {
00265       SG_ERROR( "CCommUlongStringKernel optimization not initialized\n");
00266         return 0 ; 
00267     }
00268 
00269 
00270 
00271     int32_t alen = -1;
00272     bool free_avec;
00273     uint64_t* avec=((CStringFeatures<uint64_t>*) rhs)->
00274         get_feature_vector(i, alen, free_avec);
00275 
00276     if (avec && alen>0)
00277     {
00278         if (use_sign)
00279         {
00280             for (j=1; j<alen; j++)
00281             {
00282                 if (avec[j]==avec[j-1])
00283                     continue;
00284 
00285                 int32_t idx = CMath::binary_search_max_lower_equal(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[j-1]);
00286 
00287                 if (idx!=-1)
00288                 {
00289                     if (dictionary[idx+old_idx] == avec[j-1])
00290                         result += dictionary_weights[idx+old_idx];
00291 
00292                     old_idx+=idx;
00293                 }
00294             }
00295 
00296             int32_t idx = CMath::binary_search(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[alen-1]);
00297             if (idx!=-1)
00298                 result += dictionary_weights[idx+old_idx];
00299         }
00300         else
00301         {
00302             for (j=1; j<alen; j++)
00303             {
00304                 if (avec[j]==avec[j-1])
00305                     continue;
00306 
00307                 int32_t idx = CMath::binary_search_max_lower_equal(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[j-1]);
00308 
00309                 if (idx!=-1)
00310                 {
00311                     if (dictionary[idx+old_idx] == avec[j-1])
00312                         result += dictionary_weights[idx+old_idx]*(j-last_j);
00313 
00314                     old_idx+=idx;
00315                 }
00316 
00317                 last_j = j;
00318             }
00319 
00320             int32_t idx = CMath::binary_search(&(dictionary.get_array()[old_idx]), dictionary.get_num_elements()-old_idx, avec[alen-1]);
00321             if (idx!=-1)
00322                 result += dictionary_weights[idx+old_idx]*(alen-last_j);
00323         }
00324     }
00325 
00326     ((CStringFeatures<uint64_t>*) rhs)->free_feature_vector(avec, i, free_avec);
00327 
00328     return normalizer->normalize_rhs(result, i);
00329 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation