00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #ifndef _STREAMING_SIMPLEFEATURES__H__
00011 #define _STREAMING_SIMPLEFEATURES__H__
00012
00013 #include <shogun/lib/common.h>
00014 #include <shogun/mathematics/Math.h>
00015 #include <shogun/features/StreamingDotFeatures.h>
00016 #include <shogun/lib/DataType.h>
00017 #include <shogun/io/StreamingFileFromSimpleFeatures.h>
00018 #include <shogun/io/InputParser.h>
00019
00020 namespace shogun
00021 {
00027 template <class T> class CStreamingSimpleFeatures : public CStreamingDotFeatures
00028 {
00029 public:
00030
00038 CStreamingSimpleFeatures()
00039 : CStreamingDotFeatures()
00040 {
00041 set_read_functions();
00042 init();
00043 parser.set_free_vector_after_release(false);
00044 }
00045
00054 CStreamingSimpleFeatures(CStreamingFile* file,
00055 bool is_labelled,
00056 int32_t size)
00057 : CStreamingDotFeatures()
00058 {
00059 init(file, is_labelled, size);
00060 set_read_functions();
00061 parser.set_free_vector_after_release(false);
00062 }
00063
00071 CStreamingSimpleFeatures(CSimpleFeatures<T>* simple_features,
00072 float64_t* lab=NULL)
00073 : CStreamingDotFeatures()
00074 {
00075 CStreamingFileFromSimpleFeatures<T>* file;
00076 bool is_labelled;
00077 int32_t size = 1024;
00078
00079 if (lab)
00080 {
00081 is_labelled = true;
00082 file = new CStreamingFileFromSimpleFeatures<T>(simple_features, lab);
00083 }
00084 else
00085 {
00086 is_labelled = false;
00087 file = new CStreamingFileFromSimpleFeatures<T>(simple_features);
00088 }
00089
00090 SG_REF(file);
00091
00092 init(file, is_labelled, size);
00093 set_read_functions();
00094 parser.set_free_vector_after_release(false);
00095 parser.set_free_vectors_on_destruct(false);
00096 seekable=true;
00097 }
00098
00104 ~CStreamingSimpleFeatures()
00105 {
00106 parser.end_parser();
00107 }
00108
00118 virtual void set_vector_reader();
00119
00129 virtual void set_vector_and_label_reader();
00130
00136 virtual void start_parser();
00137
00143 virtual void end_parser();
00144
00149 virtual void reset_stream()
00150 {
00151 if (seekable)
00152 {
00153 ((CStreamingFileFromSimpleFeatures<T>*) working_file)->reset_stream();
00154 parser.exit_parser();
00155 parser.init(working_file, has_labels, 1);
00156 parser.set_free_vector_after_release(false);
00157 parser.start_parser();
00158 }
00159 }
00160
00169 virtual bool get_next_example();
00170
00176 SGVector<T> get_vector();
00177
00185 virtual float64_t get_label();
00186
00193 virtual void release_example();
00194
00202 virtual int32_t get_dim_feature_space() const;
00203
00211 virtual float32_t dot(SGVector<T> vec);
00212
00223 virtual float32_t dot(CStreamingDotFeatures *df);
00224
00232 virtual float32_t dense_dot(const float32_t* vec2, int32_t vec2_len)
00233 {
00234 ASSERT(vec2_len==current_length);
00235 float32_t result=0;
00236
00237 for (int32_t i=0; i<current_length; i++)
00238 result+=current_vector[i]*vec2[i];
00239
00240 return result;
00241 }
00242
00250 virtual float64_t dense_dot(const float64_t* vec2, int32_t vec2_len)
00251 {
00252 ASSERT(vec2_len==current_length);
00253 float64_t result=0;
00254
00255 for (int32_t i=0; i<current_length; i++)
00256 result+=current_vector[i]*vec2[i];
00257
00258 return result;
00259 }
00260
00270 virtual void add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len , bool abs_val=false)
00271 {
00272 ASSERT(vec2_len==current_length);
00273
00274 if (abs_val)
00275 {
00276 for (int32_t i=0; i<current_length; i++)
00277 vec2[i]+=alpha*CMath::abs(current_vector[i]);
00278 }
00279 else
00280 {
00281 for (int32_t i=0; i<current_length; i++)
00282 vec2[i]+=alpha*current_vector[i];
00283 }
00284 }
00285
00295 virtual void add_to_dense_vec(float64_t alpha, float64_t* vec2, int32_t vec2_len , bool abs_val=false)
00296 {
00297 ASSERT(vec2_len==current_length);
00298
00299 if (abs_val)
00300 {
00301 for (int32_t i=0; i<current_length; i++)
00302 vec2[i]+=alpha*CMath::abs(current_vector[i]);
00303 }
00304 else
00305 {
00306 for (int32_t i=0; i<current_length; i++)
00307 vec2[i]+=alpha*current_vector[i];
00308 }
00309 }
00310
00315 virtual inline int32_t get_nnz_features_for_vector()
00316 {
00317 return current_length;
00318 }
00319
00325 int32_t get_num_features();
00326
00332 virtual inline EFeatureType get_feature_type();
00333
00339 virtual EFeatureClass get_feature_class();
00340
00346 virtual CFeatures* duplicate() const
00347 {
00348 return new CStreamingSimpleFeatures<T>(*this);
00349 }
00350
00356 inline virtual const char* get_name() const { return "StreamingSimpleFeatures"; }
00357
00363 inline virtual int32_t get_num_vectors() const
00364 {
00365 if (current_vector)
00366 return 1;
00367 return 0;
00368 }
00369
00375 virtual int32_t get_size() { return sizeof(T); }
00376
00377 private:
00382 void init();
00383
00391 void init(CStreamingFile *file, bool is_labelled, int32_t size);
00392
00393 protected:
00394
00396 float32_t combined_weight;
00397
00399 CInputParser<T> parser;
00400
00402 SGVector<T> current_sgvector;
00403
00405 T* current_vector;
00406
00408 float64_t current_label;
00409
00411 int32_t current_length;
00412 };
00413
00414 template <class T> void CStreamingSimpleFeatures<T>::set_vector_reader()
00415 {
00416 parser.set_read_vector(&CStreamingFile::get_vector);
00417 }
00418
00419 template <class T> void CStreamingSimpleFeatures<T>::set_vector_and_label_reader()
00420 {
00421 parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
00422 }
00423
00424 #define GET_FEATURE_TYPE(f_type, sg_type) \
00425 template<> inline EFeatureType CStreamingSimpleFeatures<sg_type>::get_feature_type() \
00426 { \
00427 return f_type; \
00428 }
00429
00430 GET_FEATURE_TYPE(F_BOOL, bool)
00431 GET_FEATURE_TYPE(F_CHAR, char)
00432 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00433 GET_FEATURE_TYPE(F_BYTE, int8_t)
00434 GET_FEATURE_TYPE(F_SHORT, int16_t)
00435 GET_FEATURE_TYPE(F_WORD, uint16_t)
00436 GET_FEATURE_TYPE(F_INT, int32_t)
00437 GET_FEATURE_TYPE(F_UINT, uint32_t)
00438 GET_FEATURE_TYPE(F_LONG, int64_t)
00439 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00440 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00441 GET_FEATURE_TYPE(F_DREAL, float64_t)
00442 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00443 #undef GET_FEATURE_TYPE
00444
00445
00446 template <class T>
00447 void CStreamingSimpleFeatures<T>::init()
00448 {
00449 working_file=NULL;
00450 current_vector=NULL;
00451 seekable=false;
00452 current_length=-1;
00453 }
00454
00455 template <class T>
00456 void CStreamingSimpleFeatures<T>::init(CStreamingFile* file,
00457 bool is_labelled,
00458 int32_t size)
00459 {
00460 init();
00461 has_labels = is_labelled;
00462 working_file = file;
00463 parser.init(file, is_labelled, size);
00464 seekable=false;
00465 }
00466
00467 template <class T>
00468 void CStreamingSimpleFeatures<T>::start_parser()
00469 {
00470 if (!parser.is_running())
00471 parser.start_parser();
00472 }
00473
00474 template <class T>
00475 void CStreamingSimpleFeatures<T>::end_parser()
00476 {
00477 parser.end_parser();
00478 }
00479
00480 template <class T>
00481 bool CStreamingSimpleFeatures<T>::get_next_example()
00482 {
00483 bool ret_value;
00484 ret_value = (bool) parser.get_next_example(current_vector,
00485 current_length,
00486 current_label);
00487
00488 return ret_value;
00489 }
00490
00491 template <class T>
00492 SGVector<T> CStreamingSimpleFeatures<T>::get_vector()
00493 {
00494 current_sgvector.vector=current_vector;
00495 current_sgvector.vlen=current_length;
00496
00497 return current_sgvector;
00498 }
00499
00500 template <class T>
00501 float64_t CStreamingSimpleFeatures<T>::get_label()
00502 {
00503 ASSERT(has_labels);
00504
00505 return current_label;
00506 }
00507
00508 template <class T>
00509 void CStreamingSimpleFeatures<T>::release_example()
00510 {
00511 parser.finalize_example();
00512 }
00513
00514 template <class T>
00515 int32_t CStreamingSimpleFeatures<T>::get_dim_feature_space() const
00516 {
00517 return current_length;
00518 }
00519
00520 template <class T>
00521 float32_t CStreamingSimpleFeatures<T>::dot(CStreamingDotFeatures* df)
00522 {
00523 ASSERT(df);
00524 ASSERT(df->get_feature_type() == get_feature_type());
00525 ASSERT(df->get_feature_class() == get_feature_class());
00526 CStreamingSimpleFeatures<T>* sf = (CStreamingSimpleFeatures<T>*) df;
00527
00528 SGVector<T> other_vector=sf->get_vector();
00529
00530 float32_t result = CMath::dot(current_vector, other_vector.vector, current_length);
00531
00532 return result;
00533 }
00534
00535 template <class T>
00536 float32_t CStreamingSimpleFeatures<T>::dot(SGVector<T> sgvec1)
00537 {
00538 int32_t len1;
00539 len1=sgvec1.vlen;
00540
00541 if (len1 != current_length)
00542 SG_ERROR("Lengths %d and %d not equal while computing dot product!\n", len1, current_length);
00543
00544 float32_t result=CMath::dot(current_vector, sgvec1.vector, len1);
00545 return result;
00546 }
00547
00548 template <class T>
00549 int32_t CStreamingSimpleFeatures<T>::get_num_features()
00550 {
00551 return current_length;
00552 }
00553
00554 template <class T>
00555 EFeatureClass CStreamingSimpleFeatures<T>::get_feature_class()
00556 {
00557 return C_STREAMING_SIMPLE;
00558 }
00559
00560 }
00561 #endif // _STREAMING_SIMPLEFEATURES__H__