StreamingDenseFeatures.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2011 Shashwat Lal Das
00008  * Written (W) 2012 Heiko Strathmann
00009  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
00010  */
00011 
00012 #include <shogun/mathematics/Math.h>
00013 #include <shogun/features/streaming/StreamingDenseFeatures.h>
00014 #include <shogun/io/streaming/StreamingFileFromDenseFeatures.h>
00015 
00016 namespace shogun
00017 {
00018 template<class T>
00019 CStreamingDenseFeatures<T>::CStreamingDenseFeatures() :
00020         CStreamingDotFeatures()
00021 {
00022     set_read_functions();
00023     init();
00024     parser.set_free_vector_after_release(false);
00025 }
00026 
00027 template<class T>
00028 CStreamingDenseFeatures<T>::CStreamingDenseFeatures(CStreamingFile* file,
00029         bool is_labelled, int32_t size) :
00030         CStreamingDotFeatures()
00031 {
00032     init(file, is_labelled, size);
00033     set_read_functions();
00034     parser.set_free_vector_after_release(false);
00035 }
00036 
00037 template<class T> CStreamingDenseFeatures<T>::CStreamingDenseFeatures(
00038         CDenseFeatures<T>* dense_features, float64_t* lab) :
00039         CStreamingDotFeatures()
00040 {
00041     REQUIRE(dense_features, "%s::CStreamingDenseFeatures(): Features needed!\n");
00042 
00043     CStreamingFileFromDenseFeatures<T>* file;
00044     bool is_labelled;
00045     int32_t size=1024;
00046 
00047     is_labelled=lab;
00048     file=new CStreamingFileFromDenseFeatures<T>(dense_features, lab);
00049     init(file, is_labelled, size);
00050     set_read_functions();
00051     parser.set_free_vector_after_release(false);
00052     parser.set_free_vectors_on_destruct(false);
00053     seekable=true;
00054 }
00055 
00056 template<class T> CStreamingDenseFeatures<T>::~CStreamingDenseFeatures()
00057 {
00058     SG_DEBUG("entering %s::~CStreamingDenseFeatures()\n", get_name());
00059     SG_DEBUG("leaving %s::~CStreamingDenseFeatures()\n", get_name());
00060 
00061     current_vector.vector=NULL;
00062     current_vector.vlen=0;
00063 }
00064 
00065 template<class T> void CStreamingDenseFeatures<T>::reset_stream()
00066 {
00067     if (seekable)
00068     {
00069         ((CStreamingFileFromDenseFeatures<T>*)working_file)->reset_stream();
00070         parser.exit_parser();
00071         parser.init(working_file, has_labels, 1);
00072         parser.set_free_vector_after_release(false);
00073         parser.start_parser();
00074     }
00075 }
00076 
00077 template<class T> float32_t CStreamingDenseFeatures<T>::dense_dot(
00078         const float32_t* vec2, int32_t vec2_len)
00079 {
00080     ASSERT(vec2_len==current_vector.vlen);
00081     float32_t result=0;
00082 
00083     for (int32_t i=0; i<current_vector.vlen; i++)
00084         result+=current_vector[i]*vec2[i];
00085 
00086     return result;
00087 }
00088 
00089 template<class T> float64_t CStreamingDenseFeatures<T>::dense_dot(
00090         const float64_t* vec2, int32_t vec2_len)
00091 {
00092     ASSERT(vec2_len==current_vector.vlen);
00093     float64_t result=0;
00094 
00095     for (int32_t i=0; i<current_vector.vlen; i++)
00096         result+=current_vector[i]*vec2[i];
00097 
00098     return result;
00099 }
00100 
00101 template<class T> void CStreamingDenseFeatures<T>::add_to_dense_vec(
00102         float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)
00103 {
00104     ASSERT(vec2_len==current_vector.vlen);
00105 
00106     if (abs_val)
00107     {
00108         for (int32_t i=0; i<current_vector.vlen; i++)
00109             vec2[i]+=alpha*CMath::abs(current_vector[i]);
00110     }
00111     else
00112     {
00113         for (int32_t i=0; i<current_vector.vlen; i++)
00114             vec2[i]+=alpha*current_vector[i];
00115     }
00116 }
00117 
00118 template<class T> void CStreamingDenseFeatures<T>::add_to_dense_vec(
00119         float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val)
00120 {
00121     ASSERT(vec2_len==current_vector.vlen);
00122 
00123     if (abs_val)
00124     {
00125         for (int32_t i=0; i<current_vector.vlen; i++)
00126             vec2[i]+=alpha*CMath::abs(current_vector[i]);
00127     }
00128     else
00129     {
00130         for (int32_t i=0; i<current_vector.vlen; i++)
00131             vec2[i]+=alpha*current_vector[i];
00132     }
00133 }
00134 
00135 template<class T> int32_t CStreamingDenseFeatures<T>::get_nnz_features_for_vector()
00136 {
00137     return current_vector.vlen;
00138 }
00139 
00140 template<class T> CFeatures* CStreamingDenseFeatures<T>::duplicate() const
00141 {
00142     return new CStreamingDenseFeatures<T>(*this);
00143 }
00144 
00145 template<class T> int32_t CStreamingDenseFeatures<T>::get_num_vectors() const
00146 {
00147 //  if (current_vector.vector)
00148         return 1;
00149 //  return 0;
00150 }
00151 
00152 template<class T> int32_t CStreamingDenseFeatures<T>::get_size() const
00153 {
00154     return sizeof(T);
00155 }
00156 
00157 template<class T>
00158 void CStreamingDenseFeatures<T>::set_vector_reader()
00159 {
00160     parser.set_read_vector(&CStreamingFile::get_vector);
00161 }
00162 
00163 template<class T>
00164 void CStreamingDenseFeatures<T>::set_vector_and_label_reader()
00165 {
00166     parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
00167 }
00168 
00169 #define GET_FEATURE_TYPE(f_type, sg_type)               \
00170 template<> EFeatureType CStreamingDenseFeatures<sg_type>::get_feature_type() const \
00171 {                                   \
00172     return f_type;                          \
00173 }
00174 
00175 GET_FEATURE_TYPE(F_BOOL, bool)
00176 GET_FEATURE_TYPE(F_CHAR, char)
00177 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00178 GET_FEATURE_TYPE(F_BYTE, int8_t)
00179 GET_FEATURE_TYPE(F_SHORT, int16_t)
00180 GET_FEATURE_TYPE(F_WORD, uint16_t)
00181 GET_FEATURE_TYPE(F_INT, int32_t)
00182 GET_FEATURE_TYPE(F_UINT, uint32_t)
00183 GET_FEATURE_TYPE(F_LONG, int64_t)
00184 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00185 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00186 GET_FEATURE_TYPE(F_DREAL, float64_t)
00187 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00188 #undef GET_FEATURE_TYPE
00189 
00190 template<class T>
00191 void CStreamingDenseFeatures<T>::init()
00192 {
00193     working_file=NULL;
00194     current_vector.vector=NULL;
00195     seekable=false;
00196     current_vector.vlen=-1;
00197 }
00198 
00199 template<class T>
00200 void CStreamingDenseFeatures<T>::init(CStreamingFile* file, bool is_labelled,
00201         int32_t size)
00202 {
00203     init();
00204     has_labels=is_labelled;
00205     working_file=file;
00206     SG_REF(working_file);
00207     parser.init(file, is_labelled, size);
00208     seekable=false;
00209 }
00210 
00211 template<class T>
00212 void CStreamingDenseFeatures<T>::start_parser()
00213 {
00214     if (!parser.is_running())
00215         parser.start_parser();
00216 }
00217 
00218 template<class T>
00219 void CStreamingDenseFeatures<T>::end_parser()
00220 {
00221     parser.end_parser();
00222 }
00223 
00224 template<class T>
00225 bool CStreamingDenseFeatures<T>::get_next_example()
00226 {
00227     bool ret_value;
00228     ret_value=(bool)parser.get_next_example(current_vector.vector,
00229             current_vector.vlen, current_label);
00230 
00231     return ret_value;
00232 }
00233 
00234 template<class T>
00235 SGVector<T> CStreamingDenseFeatures<T>::get_vector()
00236 {
00237     return current_vector;
00238 }
00239 
00240 template<class T>
00241 float64_t CStreamingDenseFeatures<T>::get_label()
00242 {
00243     ASSERT(has_labels);
00244 
00245     return current_label;
00246 }
00247 
00248 template<class T>
00249 void CStreamingDenseFeatures<T>::release_example()
00250 {
00251     parser.finalize_example();
00252 }
00253 
00254 template<class T>
00255 int32_t CStreamingDenseFeatures<T>::get_dim_feature_space() const
00256 {
00257     return current_vector.vlen;
00258 }
00259 
00260 template<class T>
00261 float32_t CStreamingDenseFeatures<T>::dot(CStreamingDotFeatures* df)
00262 {
00263     ASSERT(df);
00264     ASSERT(df->get_feature_type() == get_feature_type());
00265     ASSERT(df->get_feature_class() == get_feature_class());
00266     CStreamingDenseFeatures<T>* sf=(CStreamingDenseFeatures<T>*)df;
00267 
00268     SGVector<T> other_vector=sf->get_vector();
00269 
00270     return SGVector<T>::dot(current_vector.vector, other_vector.vector, current_vector.vlen);
00271 }
00272 
00273 template<class T>
00274 float32_t CStreamingDenseFeatures<T>::dot(SGVector<T> sgvec1)
00275 {
00276     int32_t len1;
00277     len1=sgvec1.vlen;
00278 
00279     if (len1!=current_vector.vlen)
00280         SG_ERROR(
00281                 "Lengths %d and %d not equal while computing dot product!\n", len1, current_vector.vlen);
00282 
00283     return SGVector<T>::dot(current_vector.vector, sgvec1.vector, len1);
00284 }
00285 
00286 template<class T>
00287 int32_t CStreamingDenseFeatures<T>::get_num_features()
00288 {
00289     return current_vector.vlen;
00290 }
00291 
00292 template<class T>
00293 EFeatureClass CStreamingDenseFeatures<T>::get_feature_class() const
00294 {
00295     return C_STREAMING_DENSE;
00296 }
00297 
00298 template<class T>
00299 CFeatures* CStreamingDenseFeatures<T>::get_streamed_features(
00300         index_t num_elements)
00301 {
00302     SG_DEBUG("entering %s(%p)::get_streamed_features(%d)\n", get_name(), this,
00303             num_elements);
00304 
00305     /* init matrix empty since num_rows is not yet known */
00306     SGMatrix<T> matrix;
00307 
00308     for (index_t i=0; i<num_elements; ++i)
00309     {
00310         /* check if we run out of data */
00311         if (!get_next_example())
00312         {
00313             SG_WARNING("%s::get_streamed_features(): ran out of streaming "
00314                     "data, reallocating matrix and returning!\n", get_name());
00315 
00316             /* allocating space for data so far */
00317             SGMatrix<T> so_far(matrix.num_rows, i);
00318 
00319             /* copy */
00320             memcpy(so_far.matrix, matrix.matrix,
00321                     so_far.num_rows*so_far.num_cols*sizeof(T));
00322 
00323             matrix=so_far;
00324             break;
00325         }
00326         else
00327         {
00328             /* allocate matrix memory during first run */
00329             if (!matrix.matrix)
00330             {
00331                 SG_DEBUG("%s::get_streamed_features(): allocating %dx%d matrix\n",
00332                         get_name(), current_vector.vlen, num_elements);
00333                 matrix=SGMatrix<T>(current_vector.vlen, num_elements);
00334             }
00335 
00336             /* get an example from stream and copy to feature matrix */
00337             SGVector<T> vec=get_vector();
00338 
00339             /* check for inconsistent dimensions */
00340             if (vec.vlen!=matrix.num_rows)
00341             {
00342                 SG_ERROR("%s::get_streamed_features(): streamed vectors have "
00343                         "different dimensions. This is not allowed!\n",
00344                         get_name());
00345             }
00346 
00347             /* copy vector into matrix */
00348             memcpy(&matrix.matrix[current_vector.vlen*i], vec.vector,
00349                     vec.vlen*sizeof(T));
00350 
00351             /* evtl output vector */
00352             if (sg_io->get_loglevel()==MSG_DEBUG)
00353             {
00354                 SG_DEBUG("%d. ", i);
00355                 vec.display_vector("streamed vector");
00356             }
00357 
00358             /* clean up */
00359             release_example();
00360         }
00361 
00362     }
00363 
00364     /* create new feature object from collected data */
00365     CDenseFeatures<T>* result=new CDenseFeatures<T>(matrix);
00366 
00367     SG_DEBUG("leaving %s(%p)::get_streamed_features(%d) and returning %dx%d "
00368             "matrix\n", get_name(), this, num_elements, matrix.num_rows,
00369             matrix.num_cols);
00370 
00371     return result;
00372 }
00373 
00374 template class CStreamingDenseFeatures<bool> ;
00375 template class CStreamingDenseFeatures<char> ;
00376 template class CStreamingDenseFeatures<int8_t> ;
00377 template class CStreamingDenseFeatures<uint8_t> ;
00378 template class CStreamingDenseFeatures<int16_t> ;
00379 template class CStreamingDenseFeatures<uint16_t> ;
00380 template class CStreamingDenseFeatures<int32_t> ;
00381 template class CStreamingDenseFeatures<uint32_t> ;
00382 template class CStreamingDenseFeatures<int64_t> ;
00383 template class CStreamingDenseFeatures<uint64_t> ;
00384 template class CStreamingDenseFeatures<float32_t> ;
00385 template class CStreamingDenseFeatures<float64_t> ;
00386 template class CStreamingDenseFeatures<floatmax_t> ;
00387 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation