ExplicitSpecFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2009 Soeren Sonnenburg
00008  * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/features/ExplicitSpecFeatures.h>
00012 #include <shogun/io/SGIO.h>
00013 
00014 using namespace shogun;
00015 
00016 CExplicitSpecFeatures::CExplicitSpecFeatures() :CDotFeatures()
00017 {
00018     SG_UNSTABLE("CExplicitSpecFeatures::CExplicitSpecFeatures()",
00019                 "\n");
00020 
00021     use_normalization = false;
00022     num_strings = 0;
00023     alphabet_size = 0;
00024 
00025     spec_size = 0;
00026     k_spectrum = NULL;
00027 }
00028 
00029 
00030 CExplicitSpecFeatures::CExplicitSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures()
00031 {
00032     ASSERT(str);
00033 
00034     use_normalization=normalize;
00035     num_strings = str->get_num_vectors();
00036     spec_size = str->get_num_symbols();
00037 
00038     obtain_kmer_spectrum(str);
00039 
00040     SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings);
00041 }
00042 
00043 CExplicitSpecFeatures::CExplicitSpecFeatures(const CExplicitSpecFeatures& orig) : CDotFeatures(orig),
00044     num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
00045 {
00046     k_spectrum= SG_MALLOC(float64_t*, num_strings);
00047     for (int32_t i=0; i<num_strings; i++)
00048         k_spectrum[i]=SGVector<float64_t>::clone_vector(k_spectrum[i], spec_size);
00049 }
00050 
00051 CExplicitSpecFeatures::~CExplicitSpecFeatures()
00052 {
00053     delete_kmer_spectrum();
00054 }
00055 
00056 int32_t CExplicitSpecFeatures::get_dim_feature_space() const
00057 {
00058     return spec_size;
00059 }
00060 
00061 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00062 {
00063     ASSERT(df);
00064     ASSERT(df->get_feature_type() == get_feature_type());
00065     ASSERT(df->get_feature_class() == get_feature_class());
00066     CExplicitSpecFeatures* sf = (CExplicitSpecFeatures*) df;
00067 
00068     ASSERT(vec_idx1 < num_strings);
00069     ASSERT(vec_idx2 < sf->num_strings);
00070     float64_t* vec1=k_spectrum[vec_idx1];
00071     float64_t* vec2=sf->k_spectrum[vec_idx2];
00072 
00073     return SGVector<float64_t>::dot(vec1, vec2, spec_size);
00074 }
00075 
00076 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00077 {
00078     ASSERT(vec2_len == spec_size);
00079     ASSERT(vec_idx1 < num_strings);
00080     float64_t* vec1=k_spectrum[vec_idx1];
00081     float64_t result=0;
00082 
00083     for (int32_t i=0; i<spec_size; i++)
00084         result+=vec1[i]*vec2[i];
00085 
00086     return result;
00087 }
00088 
00089 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00090 {
00091     ASSERT(vec2_len == spec_size);
00092     ASSERT(vec_idx1 < num_strings);
00093     float64_t* vec1=k_spectrum[vec_idx1];
00094 
00095     if (abs_val)
00096     {
00097         for (int32_t i=0; i<spec_size; i++)
00098             vec2[i]+=alpha*CMath::abs(vec1[i]);
00099     }
00100     else
00101     {
00102         for (int32_t i=0; i<spec_size; i++)
00103             vec2[i]+=alpha*vec1[i];
00104     }
00105 }
00106 
00107 void CExplicitSpecFeatures::obtain_kmer_spectrum(CStringFeatures<uint16_t>* str)
00108 {
00109     k_spectrum= SG_MALLOC(float64_t*, num_strings);
00110 
00111     for (int32_t i=0; i<num_strings; i++)
00112     {
00113         k_spectrum[i]=SG_MALLOC(float64_t, spec_size);
00114         memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size);
00115 
00116         int32_t len=0;
00117         bool free_fv;
00118         uint16_t* fv=str->get_feature_vector(i, len, free_fv);
00119 
00120         for (int32_t j=0; j<len; j++)
00121             k_spectrum[i][fv[j]]++;
00122 
00123         str->free_feature_vector(fv, i, free_fv);
00124 
00125         if (use_normalization)
00126         {
00127             float64_t n=0;
00128             for (int32_t j=0; j<spec_size; j++)
00129                 n+=CMath::sq(k_spectrum[i][j]);
00130 
00131             n=CMath::sqrt(n);
00132 
00133             for (int32_t j=0; j<spec_size; j++)
00134                 k_spectrum[i][j]/=n;
00135         }
00136     }
00137 }
00138 
00139 void CExplicitSpecFeatures::delete_kmer_spectrum()
00140 {
00141     for (int32_t i=0; i<num_strings; i++)
00142         SG_FREE(k_spectrum[i]);
00143 
00144     SG_FREE(k_spectrum);
00145     k_spectrum=NULL;
00146 }
00147 
00148 CFeatures* CExplicitSpecFeatures::duplicate() const
00149 {
00150     return new CExplicitSpecFeatures(*this);
00151 }
00152 
00153 
00154 
00155 void* CExplicitSpecFeatures::get_feature_iterator(int32_t vector_index)
00156 {
00157     SG_NOTIMPLEMENTED;
00158     return NULL;
00159 }
00160 
00161 bool CExplicitSpecFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
00162 {
00163     SG_NOTIMPLEMENTED;
00164     return NULL;
00165 }
00166 
00167 void CExplicitSpecFeatures::free_feature_iterator(void* iterator)
00168 {
00169     SG_NOTIMPLEMENTED;
00170 }
00171 
00172 int32_t CExplicitSpecFeatures::get_nnz_features_for_vector(int32_t num)
00173 {
00174     SG_NOTIMPLEMENTED;
00175     return 0;
00176 }
00177 
00178 EFeatureType CExplicitSpecFeatures::get_feature_type() const
00179 {
00180     return F_UNKNOWN;
00181 }
00182 
00183 EFeatureClass CExplicitSpecFeatures::get_feature_class() const
00184 {
00185     return C_SPEC;
00186 }
00187 
00188 int32_t CExplicitSpecFeatures::get_num_vectors() const
00189 {
00190     return num_strings;
00191 }
00192 
00193 int32_t CExplicitSpecFeatures::get_size() const
00194 {
00195     return sizeof(float64_t);
00196 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation