ExplicitSpecFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/features/ExplicitSpecFeatures.h>
00012 #include <shogun/io/SGIO.h>
00013 
00014 using namespace shogun;
00015 
00016 CExplicitSpecFeatures::CExplicitSpecFeatures(void) :CDotFeatures()
00017 {
00018     SG_UNSTABLE("CExplicitSpecFeatures::CExplicitSpecFeatures(void)",
00019                 "\n");
00020 
00021     use_normalization = false;
00022     num_strings = 0;
00023     alphabet_size = 0;
00024 
00025     spec_size = 0;
00026     k_spectrum = NULL;
00027 }
00028 
00029 
00030 CExplicitSpecFeatures::CExplicitSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures()
00031 {
00032     ASSERT(str);
00033 
00034     use_normalization=normalize;
00035     num_strings = str->get_num_vectors();
00036     spec_size = str->get_num_symbols();
00037 
00038     obtain_kmer_spectrum(str);
00039 
00040     SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings);
00041 }
00042 
00043 CExplicitSpecFeatures::CExplicitSpecFeatures(const CExplicitSpecFeatures& orig) : CDotFeatures(orig), 
00044     num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
00045 {
00046     k_spectrum= SG_MALLOC(float64_t*, num_strings);
00047     for (int32_t i=0; i<num_strings; i++)
00048         k_spectrum[i]=CMath::clone_vector(k_spectrum[i], spec_size);
00049 }
00050 
00051 CExplicitSpecFeatures::~CExplicitSpecFeatures()
00052 {
00053     delete_kmer_spectrum();
00054 }
00055 
00056 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00057 {
00058     ASSERT(df);
00059     ASSERT(df->get_feature_type() == get_feature_type());
00060     ASSERT(df->get_feature_class() == get_feature_class());
00061     CExplicitSpecFeatures* sf = (CExplicitSpecFeatures*) df;
00062 
00063     ASSERT(vec_idx1 < num_strings);
00064     ASSERT(vec_idx2 < sf->num_strings);
00065     float64_t* vec1=k_spectrum[vec_idx1];
00066     float64_t* vec2=sf->k_spectrum[vec_idx2];
00067 
00068     return CMath::dot(vec1, vec2, spec_size);
00069 }
00070 
00071 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00072 {
00073     ASSERT(vec2_len == spec_size);
00074     ASSERT(vec_idx1 < num_strings);
00075     float64_t* vec1=k_spectrum[vec_idx1];
00076     float64_t result=0;
00077     
00078     for (int32_t i=0; i<spec_size; i++)
00079         result+=vec1[i]*vec2[i];
00080 
00081     return result;
00082 }
00083 
00084 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00085 {
00086     ASSERT(vec2_len == spec_size);
00087     ASSERT(vec_idx1 < num_strings);
00088     float64_t* vec1=k_spectrum[vec_idx1];
00089 
00090     if (abs_val)
00091     {
00092         for (int32_t i=0; i<spec_size; i++)
00093             vec2[i]+=alpha*CMath::abs(vec1[i]);
00094     }
00095     else
00096     {
00097         for (int32_t i=0; i<spec_size; i++)
00098             vec2[i]+=alpha*vec1[i];
00099     }
00100 }
00101 
00102 void CExplicitSpecFeatures::obtain_kmer_spectrum(CStringFeatures<uint16_t>* str)
00103 {
00104     k_spectrum= SG_MALLOC(float64_t*, num_strings);
00105 
00106     for (int32_t i=0; i<num_strings; i++)
00107     {
00108         k_spectrum[i]=SG_MALLOC(float64_t, spec_size);
00109         memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size);
00110 
00111         int32_t len=0;
00112         bool free_fv;
00113         uint16_t* fv=str->get_feature_vector(i, len, free_fv);
00114 
00115         for (int32_t j=0; j<len; j++)
00116             k_spectrum[i][fv[j]]++;
00117 
00118         str->free_feature_vector(fv, i, free_fv);
00119 
00120         if (use_normalization)
00121         {
00122             float64_t n=0;
00123             for (int32_t j=0; j<spec_size; j++)
00124                 n+=CMath::sq(k_spectrum[i][j]);
00125 
00126             n=CMath::sqrt(n);
00127 
00128             for (int32_t j=0; j<spec_size; j++)
00129                 k_spectrum[i][j]/=n;
00130         }
00131     }
00132 }
00133 
00134 void CExplicitSpecFeatures::delete_kmer_spectrum()
00135 {
00136     for (int32_t i=0; i<num_strings; i++)
00137         SG_FREE(k_spectrum[i]);
00138 
00139     SG_FREE(k_spectrum);
00140     k_spectrum=NULL;
00141 }
00142 
00143 CFeatures* CExplicitSpecFeatures::duplicate() const
00144 {
00145     return new CExplicitSpecFeatures(*this);
00146 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation