StreamingSimpleFeatures.cpp

Go to the documentation of this file.
00001 #include <shogun/mathematics/Math.h>
00002 #include <shogun/features/StreamingSimpleFeatures.h>
00003 #include <shogun/io/StreamingFileFromSimpleFeatures.h>
00004 
00005 namespace shogun
00006 {
00007 template <class T> CStreamingSimpleFeatures<T>::CStreamingSimpleFeatures() : CStreamingDotFeatures()
00008 {
00009     set_read_functions();
00010     init();
00011     parser.set_free_vector_after_release(false);
00012 }
00013 
00014 template <class T> CStreamingSimpleFeatures<T>::CStreamingSimpleFeatures(CStreamingFile* file,
00015              bool is_labelled,
00016              int32_t size)
00017     : CStreamingDotFeatures()
00018 {
00019     init(file, is_labelled, size);
00020     set_read_functions();
00021     parser.set_free_vector_after_release(false);
00022 }
00023 
00024 template <class T> CStreamingSimpleFeatures<T>::CStreamingSimpleFeatures(CSimpleFeatures<T>* simple_features,
00025              float64_t* lab)
00026     : CStreamingDotFeatures()
00027 {
00028     CStreamingFileFromSimpleFeatures<T>* file;
00029     bool is_labelled;
00030     int32_t size = 1024;
00031 
00032     if (lab)
00033     {
00034         is_labelled = true;
00035         file = new CStreamingFileFromSimpleFeatures<T>(simple_features, lab);
00036     }
00037     else
00038     {
00039         is_labelled = false;
00040         file = new CStreamingFileFromSimpleFeatures<T>(simple_features);
00041     }
00042 
00043     SG_REF(file);
00044 
00045     init(file, is_labelled, size);
00046     set_read_functions();
00047     parser.set_free_vector_after_release(false);
00048     parser.set_free_vectors_on_destruct(false);
00049     seekable=true;
00050 }
00051 
00052 template <class T> CStreamingSimpleFeatures<T>::~CStreamingSimpleFeatures()
00053 {
00054     parser.end_parser();
00055 }
00056 
00057 template <class T> void CStreamingSimpleFeatures<T>::reset_stream()
00058 {
00059     if (seekable)
00060     {
00061         ((CStreamingFileFromSimpleFeatures<T>*) working_file)->reset_stream();
00062         parser.exit_parser();
00063         parser.init(working_file, has_labels, 1);
00064         parser.set_free_vector_after_release(false);
00065         parser.start_parser();
00066     }
00067 }
00068 
00069 template <class T> float32_t CStreamingSimpleFeatures<T>::dense_dot(const float32_t* vec2, int32_t vec2_len)
00070 {
00071     ASSERT(vec2_len==current_length);
00072     float32_t result=0;
00073 
00074     for (int32_t i=0; i<current_length; i++)
00075         result+=current_vector[i]*vec2[i];
00076 
00077     return result;
00078 }
00079 
00080 template <class T> float64_t CStreamingSimpleFeatures<T>::dense_dot(const float64_t* vec2, int32_t vec2_len)
00081 {
00082     ASSERT(vec2_len==current_length);
00083     float64_t result=0;
00084 
00085     for (int32_t i=0; i<current_length; i++)
00086         result+=current_vector[i]*vec2[i];
00087 
00088     return result;
00089 }
00090 
00091 template <class T> void CStreamingSimpleFeatures<T>::add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len , bool abs_val)
00092 {
00093     ASSERT(vec2_len==current_length);
00094 
00095     if (abs_val)
00096     {
00097         for (int32_t i=0; i<current_length; i++)
00098             vec2[i]+=alpha*CMath::abs(current_vector[i]);
00099     }
00100     else
00101     {
00102         for (int32_t i=0; i<current_length; i++)
00103             vec2[i]+=alpha*current_vector[i];
00104     }
00105 }
00106 
00107 template <class T> void CStreamingSimpleFeatures<T>::add_to_dense_vec(float64_t alpha, float64_t* vec2, int32_t vec2_len , bool abs_val)
00108 {
00109     ASSERT(vec2_len==current_length);
00110 
00111     if (abs_val)
00112     {
00113         for (int32_t i=0; i<current_length; i++)
00114             vec2[i]+=alpha*CMath::abs(current_vector[i]);
00115     }
00116     else
00117     {
00118         for (int32_t i=0; i<current_length; i++)
00119             vec2[i]+=alpha*current_vector[i];
00120     }
00121 }
00122 
00123 template <class T> int32_t CStreamingSimpleFeatures<T>::get_nnz_features_for_vector()
00124 {
00125     return current_length;
00126 }
00127 
00128 template <class T> CFeatures* CStreamingSimpleFeatures<T>::duplicate() const
00129 {
00130     return new CStreamingSimpleFeatures<T>(*this);
00131 }
00132 
00133 template <class T> int32_t CStreamingSimpleFeatures<T>::get_num_vectors() const
00134 {
00135     if (current_vector)
00136         return 1;
00137     return 0;
00138 }
00139 
00140 template <class T> int32_t CStreamingSimpleFeatures<T>::get_size()
00141 {
00142     return sizeof(T);
00143 }
00144 
00145 template <class T>
00146 void CStreamingSimpleFeatures<T>::set_vector_reader()
00147 {
00148     parser.set_read_vector(&CStreamingFile::get_vector);
00149 }
00150 
00151 template <class T>
00152 void CStreamingSimpleFeatures<T>::set_vector_and_label_reader()
00153 {
00154     parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
00155 }
00156 
00157 #define GET_FEATURE_TYPE(f_type, sg_type)               \
00158 template<> EFeatureType CStreamingSimpleFeatures<sg_type>::get_feature_type() \
00159 {                                   \
00160     return f_type;                          \
00161 }
00162 
00163 GET_FEATURE_TYPE(F_BOOL, bool)
00164 GET_FEATURE_TYPE(F_CHAR, char)
00165 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00166 GET_FEATURE_TYPE(F_BYTE, int8_t)
00167 GET_FEATURE_TYPE(F_SHORT, int16_t)
00168 GET_FEATURE_TYPE(F_WORD, uint16_t)
00169 GET_FEATURE_TYPE(F_INT, int32_t)
00170 GET_FEATURE_TYPE(F_UINT, uint32_t)
00171 GET_FEATURE_TYPE(F_LONG, int64_t)
00172 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00173 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00174 GET_FEATURE_TYPE(F_DREAL, float64_t)
00175 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00176 #undef GET_FEATURE_TYPE
00177 
00178 
00179 template <class T>
00180 void CStreamingSimpleFeatures<T>::init()
00181 {
00182     working_file=NULL;
00183     current_vector=NULL;
00184     seekable=false;
00185     current_length=-1;
00186 }
00187 
00188 template <class T>
00189 void CStreamingSimpleFeatures<T>::init(CStreamingFile* file,
00190                     bool is_labelled,
00191                     int32_t size)
00192 {
00193     init();
00194     has_labels = is_labelled;
00195     working_file = file;
00196     parser.init(file, is_labelled, size);
00197     seekable=false;
00198 }
00199 
00200 template <class T>
00201 void CStreamingSimpleFeatures<T>::start_parser()
00202 {
00203     if (!parser.is_running())
00204         parser.start_parser();
00205 }
00206 
00207 template <class T>
00208 void CStreamingSimpleFeatures<T>::end_parser()
00209 {
00210     parser.end_parser();
00211 }
00212 
00213 template <class T>
00214 bool CStreamingSimpleFeatures<T>::get_next_example()
00215 {
00216     bool ret_value;
00217     ret_value = (bool) parser.get_next_example(current_vector,
00218                            current_length,
00219                            current_label);
00220 
00221     return ret_value;
00222 }
00223 
00224 template <class T>
00225 SGVector<T> CStreamingSimpleFeatures<T>::get_vector()
00226 {
00227     current_sgvector.vector=current_vector;
00228     current_sgvector.vlen=current_length;
00229 
00230     return current_sgvector;
00231 }
00232 
00233 template <class T>
00234 float64_t CStreamingSimpleFeatures<T>::get_label()
00235 {
00236     ASSERT(has_labels);
00237 
00238     return current_label;
00239 }
00240 
00241 template <class T>
00242 void CStreamingSimpleFeatures<T>::release_example()
00243 {
00244     parser.finalize_example();
00245 }
00246 
00247 template <class T>
00248 int32_t CStreamingSimpleFeatures<T>::get_dim_feature_space() const
00249 {
00250     return current_length;
00251 }
00252 
00253 template <class T>
00254     float32_t CStreamingSimpleFeatures<T>::dot(CStreamingDotFeatures* df)
00255 {
00256     ASSERT(df);
00257     ASSERT(df->get_feature_type() == get_feature_type());
00258     ASSERT(df->get_feature_class() == get_feature_class());
00259     CStreamingSimpleFeatures<T>* sf = (CStreamingSimpleFeatures<T>*) df;
00260 
00261     SGVector<T> other_vector=sf->get_vector();
00262 
00263     float32_t result = CMath::dot(current_vector, other_vector.vector, current_length);
00264 
00265     return result;
00266 }
00267 
00268 template <class T>
00269 float32_t CStreamingSimpleFeatures<T>::dot(SGVector<T> sgvec1)
00270 {
00271     int32_t len1;
00272     len1=sgvec1.vlen;
00273 
00274     if (len1 != current_length)
00275         SG_ERROR("Lengths %d and %d not equal while computing dot product!\n", len1, current_length);
00276 
00277     float32_t result=CMath::dot(current_vector, sgvec1.vector, len1);
00278     return result;
00279 }
00280 
00281 template <class T>
00282 int32_t CStreamingSimpleFeatures<T>::get_num_features()
00283 {
00284     return current_length;
00285 }
00286 
00287 template <class T>
00288 EFeatureClass CStreamingSimpleFeatures<T>::get_feature_class()
00289 {
00290     return C_STREAMING_SIMPLE;
00291 }
00292 
00293 template class CStreamingSimpleFeatures<bool>;
00294 template class CStreamingSimpleFeatures<char>;
00295 template class CStreamingSimpleFeatures<int8_t>;
00296 template class CStreamingSimpleFeatures<uint8_t>;
00297 template class CStreamingSimpleFeatures<int16_t>;
00298 template class CStreamingSimpleFeatures<uint16_t>;
00299 template class CStreamingSimpleFeatures<int32_t>;
00300 template class CStreamingSimpleFeatures<uint32_t>;
00301 template class CStreamingSimpleFeatures<int64_t>;
00302 template class CStreamingSimpleFeatures<uint64_t>;
00303 template class CStreamingSimpleFeatures<float32_t>;
00304 template class CStreamingSimpleFeatures<float64_t>;
00305 template class CStreamingSimpleFeatures<floatmax_t>;
00306 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation