SimpleFeatures.h

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2010 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  * Copyright (C) 2010 Berlin Institute of Technology
00011  */
00012 
00013 #ifndef _SIMPLEFEATURES__H__
00014 #define _SIMPLEFEATURES__H__
00015 
00016 #include <shogun/lib/common.h>
00017 #include <shogun/lib/Cache.h>
00018 #include <shogun/io/File.h>
00019 #include <shogun/features/DotFeatures.h>
00020 #include <shogun/features/StringFeatures.h>
00021 #include <shogun/lib/DataType.h>
00022 
00023 namespace shogun {
00024 template<class ST> class CStringFeatures;
00025 template<class ST> class CSimpleFeatures;
00026 template<class ST> class SGMatrix;
00027 class CDotFeatures;
00028 
00058 template<class ST> class CSimpleFeatures: public CDotFeatures
00059 {
00060 public:
00065     CSimpleFeatures(int32_t size = 0);
00066 
00068     CSimpleFeatures(const CSimpleFeatures & orig);
00069 
00074     CSimpleFeatures(SGMatrix<ST> matrix);
00075 
00082     CSimpleFeatures(ST* src, int32_t num_feat, int32_t num_vec);
00083 
00088     CSimpleFeatures(CFile* loader);
00089 
00094     virtual CFeatures* duplicate() const;
00095 
00096     virtual ~CSimpleFeatures();
00097 
00102     void free_feature_matrix();
00103 
00108     void free_features();
00109 
00121     ST* get_feature_vector(int32_t num, int32_t& len, bool& dofree);
00122 
00130     void set_feature_vector(SGVector<ST> vector, int32_t num);
00131 
00139     SGVector<ST> get_feature_vector(int32_t num);
00140 
00149     void free_feature_vector(ST* feat_vec, int32_t num, bool dofree);
00150 
00158     void free_feature_vector(SGVector<ST> vec, int32_t num);
00159 
00173     void vector_subset(int32_t* idx, int32_t idx_len);
00174 
00188     void feature_subset(int32_t* idx, int32_t idx_len);
00189 
00199     void get_feature_matrix(ST** dst, int32_t* num_feat, int32_t* num_vec);
00200 
00207     SGMatrix<ST> get_feature_matrix();
00208 
00215     SGMatrix<ST> steal_feature_matrix();
00216 
00223     void set_feature_matrix(SGMatrix<ST> matrix);
00224 
00234     ST* get_feature_matrix(int32_t &num_feat, int32_t &num_vec);
00235 
00242     CSimpleFeatures<ST>* get_transposed();
00243 
00255     ST* get_transposed(int32_t &num_feat, int32_t &num_vec);
00256 
00269     virtual void set_feature_matrix(ST* fm, int32_t num_feat, int32_t num_vec);
00270 
00280     virtual void copy_feature_matrix(SGMatrix<ST> src);
00281 
00288     void obtain_from_dot(CDotFeatures* df);
00289 
00300     virtual bool apply_preprocessor(bool force_preprocessing = false);
00301 
00306     virtual int32_t get_size();
00307 
00312     virtual int32_t get_num_vectors() const;
00313 
00318     int32_t get_num_features();
00319 
00324     void set_num_features(int32_t num);
00325 
00332     void set_num_vectors(int32_t num);
00333 
00338     void initialize_cache();
00339 
00344     virtual EFeatureClass get_feature_class();
00345 
00350     virtual EFeatureType get_feature_type();
00351 
00360     virtual bool reshape(int32_t p_num_features, int32_t p_num_vectors);
00361 
00369     virtual int32_t get_dim_feature_space() const;
00370 
00380     virtual float64_t dot(int32_t vec_idx1, CDotFeatures* df,
00381             int32_t vec_idx2);
00382 
00391     virtual float64_t dense_dot(int32_t vec_idx1, const float64_t* vec2,
00392             int32_t vec2_len);
00393 
00404     virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1,
00405             float64_t* vec2, int32_t vec2_len, bool abs_val = false);
00406 
00412     virtual int32_t get_nnz_features_for_vector(int32_t num);
00413 
00421     virtual bool Align_char_features(CStringFeatures<char>* cf,
00422             CStringFeatures<char>* Ref, float64_t gapCost);
00423 
00428     virtual void load(CFile* loader);
00429 
00434     virtual void save(CFile* saver);
00435 
00436     #ifndef DOXYGEN_SHOULD_SKIP_THIS
00437 
00438     struct simple_feature_iterator
00439     {
00441         ST* vec;
00443         int32_t vidx;
00445         int32_t vlen;
00447         bool vfree;
00448 
00450         int32_t index;
00451     };
00452     #endif
00453 
00465     virtual void* get_feature_iterator(int32_t vector_index);
00466 
00479     virtual bool get_next_feature(int32_t& index, float64_t& value,
00480             void* iterator);
00481 
00487     virtual void free_feature_iterator(void* iterator);
00488 
00497     virtual CFeatures* copy_subset(SGVector<index_t> indices);
00498 
00500     inline virtual const char* get_name() const { return "SimpleFeatures"; }
00501 
00502 protected:
00514     virtual ST* compute_feature_vector(int32_t num, int32_t& len,
00515             ST* target = NULL);
00516 
00517 private:
00518     void init();
00519 
00520 protected:
00522     int32_t num_vectors;
00523 
00525     int32_t num_features;
00526 
00531     ST* feature_matrix;
00532 
00534     int32_t feature_matrix_num_vectors;
00535 
00537     int32_t feature_matrix_num_features;
00538 
00540     CCache<ST>* feature_cache;
00541 };
00542 }
00543 #endif // _SIMPLEFEATURES__H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation