Go to the documentation of this file.00001 #include <shogun/mathematics/Math.h>
00002 #include <shogun/features/StreamingSimpleFeatures.h>
00003 #include <shogun/io/StreamingFileFromSimpleFeatures.h>
00004
00005 namespace shogun
00006 {
00007 template <class T> CStreamingSimpleFeatures<T>::CStreamingSimpleFeatures() : CStreamingDotFeatures()
00008 {
00009 set_read_functions();
00010 init();
00011 parser.set_free_vector_after_release(false);
00012 }
00013
00014 template <class T> CStreamingSimpleFeatures<T>::CStreamingSimpleFeatures(CStreamingFile* file,
00015 bool is_labelled,
00016 int32_t size)
00017 : CStreamingDotFeatures()
00018 {
00019 init(file, is_labelled, size);
00020 set_read_functions();
00021 parser.set_free_vector_after_release(false);
00022 }
00023
00024 template <class T> CStreamingSimpleFeatures<T>::CStreamingSimpleFeatures(CSimpleFeatures<T>* simple_features,
00025 float64_t* lab)
00026 : CStreamingDotFeatures()
00027 {
00028 CStreamingFileFromSimpleFeatures<T>* file;
00029 bool is_labelled;
00030 int32_t size = 1024;
00031
00032 if (lab)
00033 {
00034 is_labelled = true;
00035 file = new CStreamingFileFromSimpleFeatures<T>(simple_features, lab);
00036 }
00037 else
00038 {
00039 is_labelled = false;
00040 file = new CStreamingFileFromSimpleFeatures<T>(simple_features);
00041 }
00042
00043 SG_REF(file);
00044
00045 init(file, is_labelled, size);
00046 set_read_functions();
00047 parser.set_free_vector_after_release(false);
00048 parser.set_free_vectors_on_destruct(false);
00049 seekable=true;
00050 }
00051
00052 template <class T> CStreamingSimpleFeatures<T>::~CStreamingSimpleFeatures()
00053 {
00054 parser.end_parser();
00055 }
00056
00057 template <class T> void CStreamingSimpleFeatures<T>::reset_stream()
00058 {
00059 if (seekable)
00060 {
00061 ((CStreamingFileFromSimpleFeatures<T>*) working_file)->reset_stream();
00062 parser.exit_parser();
00063 parser.init(working_file, has_labels, 1);
00064 parser.set_free_vector_after_release(false);
00065 parser.start_parser();
00066 }
00067 }
00068
00069 template <class T> float32_t CStreamingSimpleFeatures<T>::dense_dot(const float32_t* vec2, int32_t vec2_len)
00070 {
00071 ASSERT(vec2_len==current_length);
00072 float32_t result=0;
00073
00074 for (int32_t i=0; i<current_length; i++)
00075 result+=current_vector[i]*vec2[i];
00076
00077 return result;
00078 }
00079
00080 template <class T> float64_t CStreamingSimpleFeatures<T>::dense_dot(const float64_t* vec2, int32_t vec2_len)
00081 {
00082 ASSERT(vec2_len==current_length);
00083 float64_t result=0;
00084
00085 for (int32_t i=0; i<current_length; i++)
00086 result+=current_vector[i]*vec2[i];
00087
00088 return result;
00089 }
00090
00091 template <class T> void CStreamingSimpleFeatures<T>::add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len , bool abs_val)
00092 {
00093 ASSERT(vec2_len==current_length);
00094
00095 if (abs_val)
00096 {
00097 for (int32_t i=0; i<current_length; i++)
00098 vec2[i]+=alpha*CMath::abs(current_vector[i]);
00099 }
00100 else
00101 {
00102 for (int32_t i=0; i<current_length; i++)
00103 vec2[i]+=alpha*current_vector[i];
00104 }
00105 }
00106
00107 template <class T> void CStreamingSimpleFeatures<T>::add_to_dense_vec(float64_t alpha, float64_t* vec2, int32_t vec2_len , bool abs_val)
00108 {
00109 ASSERT(vec2_len==current_length);
00110
00111 if (abs_val)
00112 {
00113 for (int32_t i=0; i<current_length; i++)
00114 vec2[i]+=alpha*CMath::abs(current_vector[i]);
00115 }
00116 else
00117 {
00118 for (int32_t i=0; i<current_length; i++)
00119 vec2[i]+=alpha*current_vector[i];
00120 }
00121 }
00122
00123 template <class T> int32_t CStreamingSimpleFeatures<T>::get_nnz_features_for_vector()
00124 {
00125 return current_length;
00126 }
00127
00128 template <class T> CFeatures* CStreamingSimpleFeatures<T>::duplicate() const
00129 {
00130 return new CStreamingSimpleFeatures<T>(*this);
00131 }
00132
00133 template <class T> int32_t CStreamingSimpleFeatures<T>::get_num_vectors() const
00134 {
00135 if (current_vector)
00136 return 1;
00137 return 0;
00138 }
00139
00140 template <class T> int32_t CStreamingSimpleFeatures<T>::get_size()
00141 {
00142 return sizeof(T);
00143 }
00144
00145 template <class T>
00146 void CStreamingSimpleFeatures<T>::set_vector_reader()
00147 {
00148 parser.set_read_vector(&CStreamingFile::get_vector);
00149 }
00150
00151 template <class T>
00152 void CStreamingSimpleFeatures<T>::set_vector_and_label_reader()
00153 {
00154 parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
00155 }
00156
00157 #define GET_FEATURE_TYPE(f_type, sg_type) \
00158 template<> EFeatureType CStreamingSimpleFeatures<sg_type>::get_feature_type() \
00159 { \
00160 return f_type; \
00161 }
00162
00163 GET_FEATURE_TYPE(F_BOOL, bool)
00164 GET_FEATURE_TYPE(F_CHAR, char)
00165 GET_FEATURE_TYPE(F_BYTE, uint8_t)
00166 GET_FEATURE_TYPE(F_BYTE, int8_t)
00167 GET_FEATURE_TYPE(F_SHORT, int16_t)
00168 GET_FEATURE_TYPE(F_WORD, uint16_t)
00169 GET_FEATURE_TYPE(F_INT, int32_t)
00170 GET_FEATURE_TYPE(F_UINT, uint32_t)
00171 GET_FEATURE_TYPE(F_LONG, int64_t)
00172 GET_FEATURE_TYPE(F_ULONG, uint64_t)
00173 GET_FEATURE_TYPE(F_SHORTREAL, float32_t)
00174 GET_FEATURE_TYPE(F_DREAL, float64_t)
00175 GET_FEATURE_TYPE(F_LONGREAL, floatmax_t)
00176 #undef GET_FEATURE_TYPE
00177
00178
00179 template <class T>
00180 void CStreamingSimpleFeatures<T>::init()
00181 {
00182 working_file=NULL;
00183 current_vector=NULL;
00184 seekable=false;
00185 current_length=-1;
00186 }
00187
00188 template <class T>
00189 void CStreamingSimpleFeatures<T>::init(CStreamingFile* file,
00190 bool is_labelled,
00191 int32_t size)
00192 {
00193 init();
00194 has_labels = is_labelled;
00195 working_file = file;
00196 parser.init(file, is_labelled, size);
00197 seekable=false;
00198 }
00199
00200 template <class T>
00201 void CStreamingSimpleFeatures<T>::start_parser()
00202 {
00203 if (!parser.is_running())
00204 parser.start_parser();
00205 }
00206
00207 template <class T>
00208 void CStreamingSimpleFeatures<T>::end_parser()
00209 {
00210 parser.end_parser();
00211 }
00212
00213 template <class T>
00214 bool CStreamingSimpleFeatures<T>::get_next_example()
00215 {
00216 bool ret_value;
00217 ret_value = (bool) parser.get_next_example(current_vector,
00218 current_length,
00219 current_label);
00220
00221 return ret_value;
00222 }
00223
00224 template <class T>
00225 SGVector<T> CStreamingSimpleFeatures<T>::get_vector()
00226 {
00227 current_sgvector.vector=current_vector;
00228 current_sgvector.vlen=current_length;
00229
00230 return current_sgvector;
00231 }
00232
00233 template <class T>
00234 float64_t CStreamingSimpleFeatures<T>::get_label()
00235 {
00236 ASSERT(has_labels);
00237
00238 return current_label;
00239 }
00240
00241 template <class T>
00242 void CStreamingSimpleFeatures<T>::release_example()
00243 {
00244 parser.finalize_example();
00245 }
00246
00247 template <class T>
00248 int32_t CStreamingSimpleFeatures<T>::get_dim_feature_space() const
00249 {
00250 return current_length;
00251 }
00252
00253 template <class T>
00254 float32_t CStreamingSimpleFeatures<T>::dot(CStreamingDotFeatures* df)
00255 {
00256 ASSERT(df);
00257 ASSERT(df->get_feature_type() == get_feature_type());
00258 ASSERT(df->get_feature_class() == get_feature_class());
00259 CStreamingSimpleFeatures<T>* sf = (CStreamingSimpleFeatures<T>*) df;
00260
00261 SGVector<T> other_vector=sf->get_vector();
00262
00263 float32_t result = CMath::dot(current_vector, other_vector.vector, current_length);
00264
00265 return result;
00266 }
00267
00268 template <class T>
00269 float32_t CStreamingSimpleFeatures<T>::dot(SGVector<T> sgvec1)
00270 {
00271 int32_t len1;
00272 len1=sgvec1.vlen;
00273
00274 if (len1 != current_length)
00275 SG_ERROR("Lengths %d and %d not equal while computing dot product!\n", len1, current_length);
00276
00277 float32_t result=CMath::dot(current_vector, sgvec1.vector, len1);
00278 return result;
00279 }
00280
00281 template <class T>
00282 int32_t CStreamingSimpleFeatures<T>::get_num_features()
00283 {
00284 return current_length;
00285 }
00286
00287 template <class T>
00288 EFeatureClass CStreamingSimpleFeatures<T>::get_feature_class()
00289 {
00290 return C_STREAMING_SIMPLE;
00291 }
00292
00293 template class CStreamingSimpleFeatures<bool>;
00294 template class CStreamingSimpleFeatures<char>;
00295 template class CStreamingSimpleFeatures<int8_t>;
00296 template class CStreamingSimpleFeatures<uint8_t>;
00297 template class CStreamingSimpleFeatures<int16_t>;
00298 template class CStreamingSimpleFeatures<uint16_t>;
00299 template class CStreamingSimpleFeatures<int32_t>;
00300 template class CStreamingSimpleFeatures<uint32_t>;
00301 template class CStreamingSimpleFeatures<int64_t>;
00302 template class CStreamingSimpleFeatures<uint64_t>;
00303 template class CStreamingSimpleFeatures<float32_t>;
00304 template class CStreamingSimpleFeatures<float64_t>;
00305 template class CStreamingSimpleFeatures<floatmax_t>;
00306 }