00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include <shogun/mathematics/Math.h>
00013 #include <shogun/features/streaming/StreamingDenseFeatures.h>
00014 #include <shogun/io/streaming/StreamingFileFromDenseFeatures.h>
00015
00016 namespace shogun
00017 {
00018 template<class T>
00019 CStreamingDenseFeatures<T>::CStreamingDenseFeatures() :
00020 CStreamingDotFeatures()
00021 {
00022 set_read_functions();
00023 init();
00024 parser.set_free_vector_after_release(false);
00025 }
00026
00027 template<class T>
00028 CStreamingDenseFeatures<T>::CStreamingDenseFeatures(CStreamingFile* file,
00029 bool is_labelled, int32_t size) :
00030 CStreamingDotFeatures()
00031 {
00032 init(file, is_labelled, size);
00033 set_read_functions();
00034 parser.set_free_vector_after_release(false);
00035 }
00036
00037 template<class T> CStreamingDenseFeatures<T>::CStreamingDenseFeatures(
00038 CDenseFeatures<T>* dense_features, float64_t* lab) :
00039 CStreamingDotFeatures()
00040 {
00041 REQUIRE(dense_features, "%s::CStreamingDenseFeatures(): Features needed!\n");
00042
00043 CStreamingFileFromDenseFeatures<T>* file;
00044 bool is_labelled;
00045 int32_t size=1024;
00046
00047 is_labelled=lab;
00048 file=new CStreamingFileFromDenseFeatures<T>(dense_features, lab);
00049 init(file, is_labelled, size);
00050 set_read_functions();
00051 parser.set_free_vector_after_release(false);
00052 parser.set_free_vectors_on_destruct(false);
00053 seekable=true;
00054 }
00055
00056 template<class T> CStreamingDenseFeatures<T>::~CStreamingDenseFeatures()
00057 {
00058 SG_DEBUG("entering %s::~CStreamingDenseFeatures()\n", get_name());
00059 SG_DEBUG("leaving %s::~CStreamingDenseFeatures()\n", get_name());
00060
00061 current_vector.vector=NULL;
00062 current_vector.vlen=0;
00063 }
00064
00065 template<class T> void CStreamingDenseFeatures<T>::reset_stream()
00066 {
00067 if (seekable)
00068 {
00069 ((CStreamingFileFromDenseFeatures<T>*)working_file)->reset_stream();
00070 parser.exit_parser();
00071 parser.init(working_file, has_labels, 1);
00072 parser.set_free_vector_after_release(false);
00073 parser.start_parser();
00074 }
00075 }
00076
00077 template<class T> float32_t CStreamingDenseFeatures<T>::dense_dot(
00078 const float32_t* vec2, int32_t vec2_len)
00079 {
00080 ASSERT(vec2_len==current_vector.vlen);
00081 float32_t result=0;
00082
00083 for (int32_t i=0; i<current_vector.vlen; i++)
00084 result+=current_vector[i]*vec2[i];
00085
00086 return result;
00087 }
00088
00089 template<class T> float64_t CStreamingDenseFeatures<T>::dense_dot(
00090 const float64_t* vec2, int32_t vec2_len)
00091 {
00092 ASSERT(vec2_len==current_vector.vlen);
00093 float64_t result=0;
00094
00095 for (int32_t i=0; i<current_vector.vlen; i++)
00096 result+=current_vector[i]*vec2[i];
00097
00098 return result;
00099 }
00100
00101 template<class T> void CStreamingDenseFeatures<T>::add_to_dense_vec(
00102 float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)
00103 {
00104 ASSERT(vec2_len==current_vector.vlen);
00105
00106 if (abs_val)
00107 {
00108 for (int32_t i=0; i<current_vector.vlen; i++)
00109 vec2[i]+=alpha*CMath::abs(current_vector[i]);
00110 }
00111 else
00112 {
00113 for (int32_t i=0; i<current_vector.vlen; i++)
00114 vec2[i]+=alpha*current_vector[i];
00115 }
00116 }
00117
00118 template<class T> void CStreamingDenseFeatures<T>::add_to_dense_vec(
00119 float64_t alpha, float64_t* vec2, int32_t vec2_len, bool abs_val)
00120 {
00121 ASSERT(vec2_len==current_vector.vlen);
00122
00123 if (abs_val)
00124 {
00125 for (int32_t i=0; i<current_vector.vlen; i++)
00126 vec2[i]+=alpha*CMath::abs(current_vector[i]);
00127 }
00128 else
00129 {
00130 for (int32_t i=0; i<current_vector.vlen; i++)
00131 vec2[i]+=alpha*current_vector[i];
00132 }
00133 }
00134
00135 template<class T> int32_t CStreamingDenseFeatures<T>::get_nnz_features_for_vector()
00136 {
00137 return current_vector.vlen;
00138 }
00139
00140 template<class T> CFeatures* CStreamingDenseFeatures<T>::duplicate() const
00141 {
00142 return new CStreamingDenseFeatures<T>(*this);
00143 }
00144
00145 template<class T> int32_t CStreamingDenseFeatures<T>::get_num_vectors() const
00146 {
00147
00148 return 1;
00149
00150 }
00151
00152 template<class T> int32_t CStreamingDenseFeatures<T>::get_size() const
00153 {
00154 return sizeof(T);
00155 }
00156
00157 template<class T>
00158 void CStreamingDenseFeatures<T>::set_vector_reader()
00159 {
00160 parser.set_read_vector(&CStreamingFile::get_vector);
00161 }
00162
00163 template<class T>
00164 void CStreamingDenseFeatures<T>::set_vector_and_label_reader()
00165 {
00166 parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
00167 }
00168
00169 #define GET_FEATURE_TYPE(f_type, sg_type) \
00170 template<> EFeatureType CStreamingDenseFeatures<sg_type>::get_feature_type() const \
00171 { \
00172 return f_type; \
00173 }
00174
00175 GET_FEATURE_TYPE(F_BOOL, bool)
00176 GET_FEATURE_TYPE(F_CHAR, char)
00177 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00178 GET_FEATURE_TYPE(F_BYTE, int8_t)
00179 GET_FEATURE_TYPE(F_SHORT, int16_t)
00180 GET_FEATURE_TYPE(F_WORD, uint16_t)
00181 GET_FEATURE_TYPE(F_INT, int32_t)
00182 GET_FEATURE_TYPE(F_UINT, uint32_t)
00183 GET_FEATURE_TYPE(F_LONG, int64_t)
00184 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00185 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00186 GET_FEATURE_TYPE(F_DREAL, float64_t)
00187 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00188 #undef GET_FEATURE_TYPE
00189
00190 template<class T>
00191 void CStreamingDenseFeatures<T>::init()
00192 {
00193 working_file=NULL;
00194 current_vector.vector=NULL;
00195 seekable=false;
00196 current_vector.vlen=-1;
00197 }
00198
00199 template<class T>
00200 void CStreamingDenseFeatures<T>::init(CStreamingFile* file, bool is_labelled,
00201 int32_t size)
00202 {
00203 init();
00204 has_labels=is_labelled;
00205 working_file=file;
00206 SG_REF(working_file);
00207 parser.init(file, is_labelled, size);
00208 seekable=false;
00209 }
00210
00211 template<class T>
00212 void CStreamingDenseFeatures<T>::start_parser()
00213 {
00214 if (!parser.is_running())
00215 parser.start_parser();
00216 }
00217
00218 template<class T>
00219 void CStreamingDenseFeatures<T>::end_parser()
00220 {
00221 parser.end_parser();
00222 }
00223
00224 template<class T>
00225 bool CStreamingDenseFeatures<T>::get_next_example()
00226 {
00227 bool ret_value;
00228 ret_value=(bool)parser.get_next_example(current_vector.vector,
00229 current_vector.vlen, current_label);
00230
00231 return ret_value;
00232 }
00233
00234 template<class T>
00235 SGVector<T> CStreamingDenseFeatures<T>::get_vector()
00236 {
00237 return current_vector;
00238 }
00239
00240 template<class T>
00241 float64_t CStreamingDenseFeatures<T>::get_label()
00242 {
00243 ASSERT(has_labels);
00244
00245 return current_label;
00246 }
00247
00248 template<class T>
00249 void CStreamingDenseFeatures<T>::release_example()
00250 {
00251 parser.finalize_example();
00252 }
00253
00254 template<class T>
00255 int32_t CStreamingDenseFeatures<T>::get_dim_feature_space() const
00256 {
00257 return current_vector.vlen;
00258 }
00259
00260 template<class T>
00261 float32_t CStreamingDenseFeatures<T>::dot(CStreamingDotFeatures* df)
00262 {
00263 ASSERT(df);
00264 ASSERT(df->get_feature_type() == get_feature_type());
00265 ASSERT(df->get_feature_class() == get_feature_class());
00266 CStreamingDenseFeatures<T>* sf=(CStreamingDenseFeatures<T>*)df;
00267
00268 SGVector<T> other_vector=sf->get_vector();
00269
00270 return SGVector<T>::dot(current_vector.vector, other_vector.vector, current_vector.vlen);
00271 }
00272
00273 template<class T>
00274 float32_t CStreamingDenseFeatures<T>::dot(SGVector<T> sgvec1)
00275 {
00276 int32_t len1;
00277 len1=sgvec1.vlen;
00278
00279 if (len1!=current_vector.vlen)
00280 SG_ERROR(
00281 "Lengths %d and %d not equal while computing dot product!\n", len1, current_vector.vlen);
00282
00283 return SGVector<T>::dot(current_vector.vector, sgvec1.vector, len1);
00284 }
00285
00286 template<class T>
00287 int32_t CStreamingDenseFeatures<T>::get_num_features()
00288 {
00289 return current_vector.vlen;
00290 }
00291
00292 template<class T>
00293 EFeatureClass CStreamingDenseFeatures<T>::get_feature_class() const
00294 {
00295 return C_STREAMING_DENSE;
00296 }
00297
00298 template<class T>
00299 CFeatures* CStreamingDenseFeatures<T>::get_streamed_features(
00300 index_t num_elements)
00301 {
00302 SG_DEBUG("entering %s(%p)::get_streamed_features(%d)\n", get_name(), this,
00303 num_elements);
00304
00305
00306 SGMatrix<T> matrix;
00307
00308 for (index_t i=0; i<num_elements; ++i)
00309 {
00310
00311 if (!get_next_example())
00312 {
00313 SG_WARNING("%s::get_streamed_features(): ran out of streaming "
00314 "data, reallocating matrix and returning!\n", get_name());
00315
00316
00317 SGMatrix<T> so_far(matrix.num_rows, i);
00318
00319
00320 memcpy(so_far.matrix, matrix.matrix,
00321 so_far.num_rows*so_far.num_cols*sizeof(T));
00322
00323 matrix=so_far;
00324 break;
00325 }
00326 else
00327 {
00328
00329 if (!matrix.matrix)
00330 {
00331 SG_DEBUG("%s::get_streamed_features(): allocating %dx%d matrix\n",
00332 get_name(), current_vector.vlen, num_elements);
00333 matrix=SGMatrix<T>(current_vector.vlen, num_elements);
00334 }
00335
00336
00337 SGVector<T> vec=get_vector();
00338
00339
00340 if (vec.vlen!=matrix.num_rows)
00341 {
00342 SG_ERROR("%s::get_streamed_features(): streamed vectors have "
00343 "different dimensions. This is not allowed!\n",
00344 get_name());
00345 }
00346
00347
00348 memcpy(&matrix.matrix[current_vector.vlen*i], vec.vector,
00349 vec.vlen*sizeof(T));
00350
00351
00352 if (sg_io->get_loglevel()==MSG_DEBUG)
00353 {
00354 SG_DEBUG("%d. ", i);
00355 vec.display_vector("streamed vector");
00356 }
00357
00358
00359 release_example();
00360 }
00361
00362 }
00363
00364
00365 CDenseFeatures<T>* result=new CDenseFeatures<T>(matrix);
00366
00367 SG_DEBUG("leaving %s(%p)::get_streamed_features(%d) and returning %dx%d "
00368 "matrix\n", get_name(), this, num_elements, matrix.num_rows,
00369 matrix.num_cols);
00370
00371 return result;
00372 }
00373
00374 template class CStreamingDenseFeatures<bool> ;
00375 template class CStreamingDenseFeatures<char> ;
00376 template class CStreamingDenseFeatures<int8_t> ;
00377 template class CStreamingDenseFeatures<uint8_t> ;
00378 template class CStreamingDenseFeatures<int16_t> ;
00379 template class CStreamingDenseFeatures<uint16_t> ;
00380 template class CStreamingDenseFeatures<int32_t> ;
00381 template class CStreamingDenseFeatures<uint32_t> ;
00382 template class CStreamingDenseFeatures<int64_t> ;
00383 template class CStreamingDenseFeatures<uint64_t> ;
00384 template class CStreamingDenseFeatures<float32_t> ;
00385 template class CStreamingDenseFeatures<float64_t> ;
00386 template class CStreamingDenseFeatures<floatmax_t> ;
00387 }