Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/features/ExplicitSpecFeatures.h>
00012 #include <shogun/io/SGIO.h>
00013
00014 using namespace shogun;
00015
00016 CExplicitSpecFeatures::CExplicitSpecFeatures() :CDotFeatures()
00017 {
00018 SG_UNSTABLE("CExplicitSpecFeatures::CExplicitSpecFeatures()",
00019 "\n");
00020
00021 use_normalization = false;
00022 num_strings = 0;
00023 alphabet_size = 0;
00024
00025 spec_size = 0;
00026 k_spectrum = NULL;
00027 }
00028
00029
00030 CExplicitSpecFeatures::CExplicitSpecFeatures(CStringFeatures<uint16_t>* str, bool normalize) : CDotFeatures()
00031 {
00032 ASSERT(str);
00033
00034 use_normalization=normalize;
00035 num_strings = str->get_num_vectors();
00036 spec_size = str->get_num_symbols();
00037
00038 obtain_kmer_spectrum(str);
00039
00040 SG_DEBUG("SPEC size=%d, num_str=%d\n", spec_size, num_strings);
00041 }
00042
00043 CExplicitSpecFeatures::CExplicitSpecFeatures(const CExplicitSpecFeatures& orig) : CDotFeatures(orig),
00044 num_strings(orig.num_strings), alphabet_size(orig.alphabet_size), spec_size(orig.spec_size)
00045 {
00046 k_spectrum= SG_MALLOC(float64_t*, num_strings);
00047 for (int32_t i=0; i<num_strings; i++)
00048 k_spectrum[i]=SGVector<float64_t>::clone_vector(k_spectrum[i], spec_size);
00049 }
00050
00051 CExplicitSpecFeatures::~CExplicitSpecFeatures()
00052 {
00053 delete_kmer_spectrum();
00054 }
00055
00056 int32_t CExplicitSpecFeatures::get_dim_feature_space() const
00057 {
00058 return spec_size;
00059 }
00060
00061 float64_t CExplicitSpecFeatures::dot(int32_t vec_idx1, CDotFeatures* df, int32_t vec_idx2)
00062 {
00063 ASSERT(df);
00064 ASSERT(df->get_feature_type() == get_feature_type());
00065 ASSERT(df->get_feature_class() == get_feature_class());
00066 CExplicitSpecFeatures* sf = (CExplicitSpecFeatures*) df;
00067
00068 ASSERT(vec_idx1 < num_strings);
00069 ASSERT(vec_idx2 < sf->num_strings);
00070 float64_t* vec1=k_spectrum[vec_idx1];
00071 float64_t* vec2=sf->k_spectrum[vec_idx2];
00072
00073 return SGVector<float64_t>::dot(vec1, vec2, spec_size);
00074 }
00075
00076 float64_t CExplicitSpecFeatures::dense_dot(int32_t vec_idx1, const float64_t* vec2, int32_t vec2_len)
00077 {
00078 ASSERT(vec2_len == spec_size);
00079 ASSERT(vec_idx1 < num_strings);
00080 float64_t* vec1=k_spectrum[vec_idx1];
00081 float64_t result=0;
00082
00083 for (int32_t i=0; i<spec_size; i++)
00084 result+=vec1[i]*vec2[i];
00085
00086 return result;
00087 }
00088
00089 void CExplicitSpecFeatures::add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t* vec2, int32_t vec2_len, bool abs_val)
00090 {
00091 ASSERT(vec2_len == spec_size);
00092 ASSERT(vec_idx1 < num_strings);
00093 float64_t* vec1=k_spectrum[vec_idx1];
00094
00095 if (abs_val)
00096 {
00097 for (int32_t i=0; i<spec_size; i++)
00098 vec2[i]+=alpha*CMath::abs(vec1[i]);
00099 }
00100 else
00101 {
00102 for (int32_t i=0; i<spec_size; i++)
00103 vec2[i]+=alpha*vec1[i];
00104 }
00105 }
00106
00107 void CExplicitSpecFeatures::obtain_kmer_spectrum(CStringFeatures<uint16_t>* str)
00108 {
00109 k_spectrum= SG_MALLOC(float64_t*, num_strings);
00110
00111 for (int32_t i=0; i<num_strings; i++)
00112 {
00113 k_spectrum[i]=SG_MALLOC(float64_t, spec_size);
00114 memset(k_spectrum[i], 0, sizeof(float64_t)*spec_size);
00115
00116 int32_t len=0;
00117 bool free_fv;
00118 uint16_t* fv=str->get_feature_vector(i, len, free_fv);
00119
00120 for (int32_t j=0; j<len; j++)
00121 k_spectrum[i][fv[j]]++;
00122
00123 str->free_feature_vector(fv, i, free_fv);
00124
00125 if (use_normalization)
00126 {
00127 float64_t n=0;
00128 for (int32_t j=0; j<spec_size; j++)
00129 n+=CMath::sq(k_spectrum[i][j]);
00130
00131 n=CMath::sqrt(n);
00132
00133 for (int32_t j=0; j<spec_size; j++)
00134 k_spectrum[i][j]/=n;
00135 }
00136 }
00137 }
00138
00139 void CExplicitSpecFeatures::delete_kmer_spectrum()
00140 {
00141 for (int32_t i=0; i<num_strings; i++)
00142 SG_FREE(k_spectrum[i]);
00143
00144 SG_FREE(k_spectrum);
00145 k_spectrum=NULL;
00146 }
00147
00148 CFeatures* CExplicitSpecFeatures::duplicate() const
00149 {
00150 return new CExplicitSpecFeatures(*this);
00151 }
00152
00153
00154
00155 void* CExplicitSpecFeatures::get_feature_iterator(int32_t vector_index)
00156 {
00157 SG_NOTIMPLEMENTED;
00158 return NULL;
00159 }
00160
00161 bool CExplicitSpecFeatures::get_next_feature(int32_t& index, float64_t& value, void* iterator)
00162 {
00163 SG_NOTIMPLEMENTED;
00164 return NULL;
00165 }
00166
00167 void CExplicitSpecFeatures::free_feature_iterator(void* iterator)
00168 {
00169 SG_NOTIMPLEMENTED;
00170 }
00171
00172 int32_t CExplicitSpecFeatures::get_nnz_features_for_vector(int32_t num)
00173 {
00174 SG_NOTIMPLEMENTED;
00175 return 0;
00176 }
00177
00178 EFeatureType CExplicitSpecFeatures::get_feature_type() const
00179 {
00180 return F_UNKNOWN;
00181 }
00182
00183 EFeatureClass CExplicitSpecFeatures::get_feature_class() const
00184 {
00185 return C_SPEC;
00186 }
00187
00188 int32_t CExplicitSpecFeatures::get_num_vectors() const
00189 {
00190 return num_strings;
00191 }
00192
00193 int32_t CExplicitSpecFeatures::get_size() const
00194 {
00195 return sizeof(float64_t);
00196 }