Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 #include <shogun/features/StreamingVwFeatures.h>
00017
00018 using namespace shogun;
00019
00020 void CStreamingVwFeatures::set_vector_reader()
00021 {
00022 parser.set_read_vector(&CStreamingFile::get_vector);
00023 }
00024
00025 void CStreamingVwFeatures::set_vector_and_label_reader()
00026 {
00027 parser.set_read_vector_and_label(&CStreamingFile::get_vector_and_label);
00028 }
00029
00030 inline EFeatureType CStreamingVwFeatures::get_feature_type()
00031 {
00032 return F_DREAL;
00033 }
00034
00035 void CStreamingVwFeatures::init()
00036 {
00037 working_file=NULL;
00038 seekable=false;
00039 current_length=-1;
00040 current_example=NULL;
00041
00042 example_count = 0;
00043 }
00044
00045 void CStreamingVwFeatures::init(CStreamingVwFile* file, bool is_labelled, int32_t size)
00046 {
00047 init();
00048 has_labels = is_labelled;
00049 working_file = file;
00050 parser.init(file, is_labelled, size);
00051 parser.set_free_vector_after_release(false);
00052 seekable=false;
00053
00054
00055 env = ((CStreamingVwFile*) file)->get_env();
00056 SG_REF(env);
00057 }
00058
00059 void CStreamingVwFeatures::init(CStreamingVwCacheFile* file, bool is_labelled, int32_t size)
00060 {
00061 init();
00062 has_labels = is_labelled;
00063 working_file = file;
00064 parser.init(file, is_labelled, size);
00065 parser.set_free_vector_after_release(false);
00066 seekable=true;
00067
00068
00069 env = ((CStreamingVwCacheFile*) file)->get_env();
00070 SG_REF(env);
00071 }
00072
00073 void CStreamingVwFeatures::setup_example(VwExample* ae)
00074 {
00075 ae->pass = env->passes_complete;
00076 ae->num_features = 0;
00077 ae->total_sum_feat_sq = 1;
00078 ae->example_counter = ++example_count;
00079 ae->global_weight = ae->ld->weight;
00080 env->t += ae->global_weight;
00081 ae->example_t = env->t;
00082
00083
00084 if (env->ignore_some)
00085 {
00086 for (vw_size_t* i = ae->indices.begin; i != ae->indices.end; i++)
00087 if (env->ignore[*i])
00088 {
00089 ae->atomics[*i].erase();
00090 memmove(i,i+1,(ae->indices.end - (i+1))*sizeof(vw_size_t));
00091 ae->indices.end--;
00092 i--;
00093 }
00094 }
00095
00096
00097 vw_size_t constant_namespace = 128;
00098 VwFeature temp = {1,constant_hash & env->mask};
00099 ae->indices.push(constant_namespace);
00100 ae->atomics[constant_namespace].push(temp);
00101 ae->sum_feat_sq[constant_namespace] = 0;
00102
00103 if(env->stride != 1)
00104 {
00105
00106 vw_size_t stride = env->stride;
00107 for (vw_size_t* i = ae->indices.begin; i != ae->indices.end; i++)
00108 for(VwFeature* j = ae->atomics[*i].begin; j != ae->atomics[*i].end; j++)
00109 j->weight_index = j->weight_index*stride;
00110 }
00111
00112 for (vw_size_t* i = ae->indices.begin; i != ae->indices.end; i++)
00113 {
00114 ae->num_features += ae->atomics[*i].end - ae->atomics[*i].begin;
00115 ae->total_sum_feat_sq += ae->sum_feat_sq[*i];
00116 }
00117
00118
00119 for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
00120 {
00121 char* i = env->pairs.get_element(k);
00122
00123 ae->num_features
00124 += (ae->atomics[(int32_t)(i[0])].end - ae->atomics[(int32_t)(i[0])].begin)
00125 *(ae->atomics[(int32_t)(i[1])].end - ae->atomics[(int32_t)(i[1])].begin);
00126
00127 ae->total_sum_feat_sq += ae->sum_feat_sq[(int32_t)(i[0])]*ae->sum_feat_sq[(int32_t)(i[1])];
00128 }
00129 }
00130
00131 void CStreamingVwFeatures::start_parser()
00132 {
00133 if (!parser.is_running())
00134 parser.start_parser();
00135 }
00136
00137 void CStreamingVwFeatures::end_parser()
00138 {
00139 parser.end_parser();
00140 }
00141
00142 bool CStreamingVwFeatures::get_next_example()
00143 {
00144 bool ret_value;
00145 ret_value = (bool) parser.get_next_example(current_example,
00146 current_length,
00147 current_label);
00148 if (current_length < 1)
00149 return false;
00150
00151 if (ret_value)
00152 setup_example(current_example);
00153 else
00154 return false;
00155
00156 current_label = current_example->ld->label;
00157 current_length = current_example->num_features;
00158
00159 return ret_value;
00160 }
00161
00162 VwExample* CStreamingVwFeatures::get_example()
00163 {
00164 return current_example;
00165 }
00166
00167 float64_t CStreamingVwFeatures::get_label()
00168 {
00169 ASSERT(has_labels);
00170
00171 return current_label;
00172 }
00173
00174 void CStreamingVwFeatures::release_example()
00175 {
00176 env->example_number++;
00177 env->weighted_examples += current_example->ld->weight;
00178
00179 if (current_example->ld->label == FLT_MAX)
00180 env->weighted_labels += 0;
00181 else
00182 env->weighted_labels += current_example->ld->label * current_example->ld->weight;
00183
00184 env->total_features += current_example->num_features;
00185 env->sum_loss += current_example->loss;
00186
00187 current_example->reset_members();
00188 parser.finalize_example();
00189 }
00190
00191 int32_t CStreamingVwFeatures::get_dim_feature_space() const
00192 {
00193 return current_length;
00194 }
00195
00196 float32_t CStreamingVwFeatures::dot(CStreamingDotFeatures* df)
00197 {
00198 SG_NOTIMPLEMENTED;
00199 return CMath::INFTY;
00200 }
00201
00202 float32_t CStreamingVwFeatures::dense_dot(VwExample* &ex, const float32_t* vec2)
00203 {
00204 float32_t ret = 0.;
00205 for (vw_size_t* i = ex->indices.begin; i!= ex->indices.end; i++)
00206 {
00207 for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
00208 ret += vec2[f->weight_index & env->thread_mask] * f->x;
00209 }
00210 return ret;
00211 }
00212
00213 float32_t CStreamingVwFeatures::dense_dot(const float32_t* vec2, int32_t vec2_len)
00214 {
00215 return dense_dot(current_example, vec2);
00216 }
00217
00218 float32_t CStreamingVwFeatures::dense_dot(SGSparseVector<float32_t>* vec1, const float32_t* vec2)
00219 {
00220 float32_t ret = 0.;
00221 for (int32_t i = 0; i < vec1->num_feat_entries; i++)
00222 ret += vec1->features[i].entry * vec2[vec1->features[i].feat_index & env->mask];
00223
00224 return ret;
00225 }
00226
00227 float32_t CStreamingVwFeatures::dense_dot_truncated(const float32_t* vec2, VwExample* &ex, float32_t gravity)
00228 {
00229 float32_t ret = 0.;
00230 for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
00231 {
00232 for (VwFeature* f = ex->atomics[*i].begin; f!= ex->atomics[*i].end; f++)
00233 {
00234 float32_t w = vec2[f->weight_index & env->thread_mask];
00235 float32_t wprime = real_weight(w,gravity);
00236 ret += wprime*f->x;
00237 }
00238 }
00239
00240 return ret;
00241 }
00242
00243 void CStreamingVwFeatures::add_to_dense_vec(float32_t alpha, VwExample* &ex, float32_t* vec2, int32_t vec2_len, bool abs_val)
00244 {
00245 if (abs_val)
00246 {
00247 for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
00248 {
00249 for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
00250 vec2[f->weight_index & env->thread_mask] += alpha * abs(f->x);
00251 }
00252 }
00253 else
00254 {
00255 for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
00256 {
00257 for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
00258 vec2[f->weight_index & env->thread_mask] += alpha * f->x;
00259 }
00260 }
00261 }
00262
00263 void CStreamingVwFeatures::add_to_dense_vec(float32_t alpha, float32_t* vec2, int32_t vec2_len, bool abs_val)
00264 {
00265 add_to_dense_vec(alpha, current_example, vec2, vec2_len, abs_val);
00266 }
00267
00268 int32_t CStreamingVwFeatures::get_num_features()
00269 {
00270 return current_length;
00271 }
00272
00273 EFeatureClass CStreamingVwFeatures::get_feature_class()
00274 {
00275 return C_STREAMING_VW;
00276 }