SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
StreamingHashedSparseFeatures.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2013 Evangelos Anagnostopoulos
8  * Copyright (C) 2013 Evangelos Anagnostopoulos
9  */
10 
14 
15 namespace shogun
16 {
17 
18 template <class ST>
20 {
21  init(NULL, false, 0, 0, false, true);
22 }
23 
24 template <class ST>
26  bool is_labelled, int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms)
27 {
28  init(file, is_labelled, size, d, use_quadr, keep_lin_terms);
29 }
30 
31 template <class ST>
33  int32_t d, bool use_quadr, bool keep_lin_terms, float64_t* lab)
34 {
35  ASSERT(dot_features);
36 
38  new CStreamingFileFromSparseFeatures<ST>(dot_features, lab);
39  bool is_labelled = (lab != NULL);
40  int32_t size = 1024;
41 
42  init(file, is_labelled, size, d, use_quadr, keep_lin_terms);
43 
44  parser.set_free_vectors_on_destruct(false);
45  seekable=true;
46 }
47 
48 template <class ST>
50 {
51 }
52 
53 template <class ST>
54 void CStreamingHashedSparseFeatures<ST>::init(CStreamingFile* file, bool is_labelled,
55  int32_t size, int32_t d, bool use_quadr, bool keep_lin_terms)
56 {
57  dim = d;
58  SG_ADD(&dim, "dim", "Size of target dimension", MS_NOT_AVAILABLE);
59 
60  use_quadratic = use_quadr;
61  keep_linear_terms = keep_lin_terms;
62 
63  SG_ADD(&use_quadratic, "use_quadratic", "Whether to use quadratic features",
65  SG_ADD(&keep_linear_terms, "keep_linear_terms", "Whether to keep the linear terms or not",
67 
68  has_labels = is_labelled;
69  if (file)
70  {
71  working_file = file;
72  SG_REF(working_file);
73  parser.init(file, is_labelled, size);
74  seekable = false;
75  }
76  else
77  file = NULL;
78 
79  set_read_functions();
80  parser.set_free_vector_after_release(false);
81 
82  set_generic<ST>();
83 }
84 
85 template <class ST>
87 {
88  ASSERT(df);
89  ASSERT(df->get_feature_type() == get_feature_type())
90  ASSERT(strcmp(df->get_name(),get_name())==0)
91 
93  return current_vector.sparse_dot(hdf->current_vector);
94 }
95 
96 template <class ST>
98 {
99  ASSERT(vec2_len == dim);
100 
101  float32_t result = 0;
102  for (index_t i=0; i<current_vector.num_feat_entries; i++)
103  result += vec2[current_vector.features[i].feat_index] * current_vector.features[i].entry;
104 
105  return result;
106 }
107 
108 template <class ST>
110  int32_t vec2_len, bool abs_val)
111 {
112  ASSERT(vec2_len == dim);
113 
114  if (abs_val)
115  alpha = CMath::abs(alpha);
116 
117  for (index_t i=0; i<current_vector.num_feat_entries; i++)
118  vec2[current_vector.features[i].feat_index] += alpha * current_vector.features[i].entry;
119 }
120 
121 template <class ST>
123 {
124  return dim;
125 }
126 
127 template <class ST>
129 {
130  return "StreamingHashedSparseFeatures";
131 }
132 
133 template <class ST>
135 {
136  return 1;
137 }
138 
139 template <class ST>
141 {
142  return new CStreamingHashedSparseFeatures<ST>(*this);
143 }
144 
145 template <class ST>
147 {
148  SG_DEBUG("called inside set_vector_reader\n");
149  parser.set_read_vector(&CStreamingFile::get_sparse_vector);
150 }
151 
152 template <class ST>
154 {
155  parser.set_read_vector_and_label(&CStreamingFile::get_sparse_vector_and_label);
156 }
157 
158 template <class ST>
160 {
161  return F_UINT;
162 }
163 
164 template <class ST>
166 {
167  return C_STREAMING_SPARSE;
168 }
169 
170 template <class ST>
172 {
173  if (!parser.is_running())
174  parser.start_parser();
175 }
176 
177 template <class ST>
179 {
180  parser.end_parser();
181 }
182 
183 template <class ST>
185 {
186  return current_label;
187 }
188 
189 template <class ST>
191 {
192  SGSparseVector<ST> tmp;
193  if (parser.get_next_example(tmp.features,
194  tmp.num_feat_entries, current_label))
195  {
196  current_vector = CHashedSparseFeatures<ST>::hash_vector(tmp, dim,
197  use_quadratic, keep_linear_terms);
198  tmp.features = NULL;
199  tmp.num_feat_entries = -1;
200  return true;
201  }
202  return false;
203 }
204 
205 template <class ST>
207 {
208  parser.finalize_example();
209 }
210 
211 template <class ST>
213 {
214  return dim;
215 }
216 
217 template <class ST>
219 {
220  return current_vector;
221 }
222 
236 }
virtual const char * get_name() const =0
virtual void add_to_dense_vec(float32_t alpha, float32_t *vec2, int32_t vec2_len, bool abs_val=false)
int32_t index_t
Definition: common.h:62
virtual float32_t dense_dot(const float32_t *vec2, int32_t vec2_len)
Template class SparseFeatures implements sparse matrices.
This class acts as an alternative to the CStreamingSparseFeatures class and their difference is that ...
#define SG_REF(x)
Definition: SGObject.h:51
EFeatureClass
shogun feature class
Definition: FeatureTypes.h:38
A Streaming File access class.
Definition: StreamingFile.h:34
virtual float32_t dot(CStreamingDotFeatures *df)
#define ASSERT(x)
Definition: SGIO.h:201
double float64_t
Definition: common.h:50
virtual void get_sparse_vector_and_label(SGSparseVectorEntry< bool > *&vector, int32_t &len, float64_t &label)
Streaming features that support dot products among other operations.
SGSparseVectorEntry< T > * features
float float32_t
Definition: common.h:49
EFeatureType
shogun feature type
Definition: FeatureTypes.h:19
#define SG_DEBUG(...)
Definition: SGIO.h:107
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual void get_sparse_vector(SGSparseVectorEntry< bool > *&vector, int32_t &len)
The class Features is the base class of all feature objects.
Definition: Features.h:68
#define SG_ADD(...)
Definition: SGObject.h:81
Class CStreamingFileFromSparseFeatures is derived from CStreamingFile and provides an input source fo...
static SGSparseVector< ST > hash_vector(SGVector< ST > vec, int32_t dim, bool use_quadratic=false, bool keep_linear_terms=true)
virtual EFeatureType get_feature_type() const =0
static T abs(T a)
Definition: Math.h:179

SHOGUN Machine Learning Toolbox - Documentation