SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
VowpalWabbit.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2009 Yahoo! Inc. All rights reserved. The copyrights
3  * embodied in the content of this file are licensed under the BSD
4  * (revised) open source license.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * Written (W) 2011 Shashwat Lal Das
12  * Adaptation of Vowpal Wabbit v5.1.
13  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
14  */
15 
16 #include <algorithm>
18 #include <fcntl.h>
19 
20 using namespace std;
21 using namespace shogun;
22 
23 CVowpalWabbit::CVowpalWabbit()
25 {
26  reg=NULL;
27  learner=NULL;
28  init(NULL);
29 }
30 
33 {
34  reg=NULL;
35  learner=NULL;
36  init(feat);
37 }
38 
41 {
42  features = vw->features;
43  env = vw->env;
44  reg = new CVwRegressor(env);
45  SG_REF(env);
46  SG_REF(reg);
47 
48  quiet = vw->quiet;
49  no_training = vw->no_training;
50  dump_interval = vw->dump_interval;
51  sum_loss_since_last_dump = 0.;
52  reg_name = vw->reg_name;
53  reg_dump_text = vw->reg_dump_text;
54  save_predictions = vw->save_predictions;
55  prediction_fd = vw->prediction_fd;
56 
57  w = reg->weight_vectors[0];
58  reg->weight_vectors[0] = NULL;
59  copy(vw->w, vw->w+vw->w_dim, w);
60  w_dim = vw->w_dim;
61  bias = vw->bias;
62 }
63 
65 {
66  SG_UNREF(env);
67  SG_UNREF(reg);
69 }
70 
72 {
73  if (reg->weight_vectors)
74  {
75  if (reg->weight_vectors[0])
76  SG_FREE(reg->weight_vectors[0]);
77  SG_FREE(reg->weight_vectors);
78  }
79 
80  reg->init(env);
81  w = reg->weight_vectors[0];
82  reg->weight_vectors[0] = NULL;
83 }
84 
85 void CVowpalWabbit::set_adaptive(bool adaptive_learning)
86 {
87  if (adaptive_learning)
88  {
89  env->adaptive = true;
90  env->set_stride(2);
91  env->power_t = 0.;
93  }
94  else
95  env->adaptive = false;
96 }
97 
98 void CVowpalWabbit::set_exact_adaptive_norm(bool exact_adaptive)
99 {
100  if (exact_adaptive)
101  {
102  set_adaptive(true);
103  env->exact_adaptive_norm = true;
104  }
105  else
106  env->exact_adaptive_norm = false;
107 }
108 
109 void CVowpalWabbit::load_regressor(char* file_name)
110 {
111  reg->load_regressor(file_name);
112  w = reg->weight_vectors[0];
113  reg->weight_vectors[0] = NULL;
114  w_dim = 1 << env->num_bits;
115 }
116 
117 void CVowpalWabbit::set_regressor_out(char* file_name, bool is_text)
118 {
119  reg_name = file_name;
120  reg_dump_text = is_text;
121 }
122 
124 {
125  save_predictions = true;
126  prediction_fd = open(file_name, O_CREAT|O_TRUNC|O_WRONLY, 0666);
127  if (prediction_fd < 0)
128  SG_SERROR("Unable to open prediction file %s for writing!\n", file_name)
129 }
130 
132 {
133  env->pairs.push_back(pair);
134 }
135 
137 {
138  ASSERT(features || feat)
139  if (feat && (features != (CStreamingVwFeatures*) feat))
140  {
142  init((CStreamingVwFeatures*) feat);
143  }
144 
145  set_learner();
146 
147  VwExample* example = NULL;
148  vw_size_t current_pass = 0;
149 
150  const char* header_fmt = "%-10s %-10s %8s %8s %10s %8s %8s\n";
151 
152  if (!quiet)
153  {
154  SG_SPRINT(header_fmt,
155  "average", "since", "example", "example",
156  "current", "current", "current");
157  SG_SPRINT(header_fmt,
158  "loss", "last", "counter", "weight", "label", "predict", "features");
159  }
160 
162  while (env->passes_complete < env->num_passes)
163  {
164  while (features->get_next_example())
165  {
166  example = features->get_example();
167 
168  // Check if we shouldn't train (generally used for cache creation)
169  if (!no_training)
170  {
171  if (example->pass != current_pass)
172  {
173  env->eta *= env->eta_decay_rate;
174  current_pass = example->pass;
175  }
176 
177  predict_and_finalize(example);
178 
179  learner->train(example, example->eta_round);
180  example->eta_round = 0.;
181 
182  output_example(example);
183  }
184 
186  }
187  env->passes_complete++;
190  }
191  features->end_parser();
192 
193  if (env->l1_regularization > 0.)
194  {
195  uint32_t length = 1 << env->num_bits;
196  vw_size_t stride = env->stride;
198  for (uint32_t i = 0; i < length; i++)
199  reg->weight_vectors[0][stride*i] = real_weight(reg->weight_vectors[0][stride*i], gravity);
200  }
201 
202  if (reg_name != NULL)
203  reg->dump_regressor(reg_name, reg_dump_text);
204 
205  return true;
206 }
207 
209 {
210  float32_t prediction;
211  if (env->l1_regularization != 0.)
212  prediction = inline_l1_predict(ex);
213  else
214  prediction = inline_predict(ex);
215 
216  ex->final_prediction = 0;
217  ex->final_prediction += prediction;
218  ex->final_prediction = finalize_prediction(ex->final_prediction);
219  float32_t t = ex->example_t;
220 
221  if (ex->ld->label != FLT_MAX)
222  {
223  ex->loss = reg->get_loss(ex->final_prediction, ex->ld->label) * ex->ld->weight;
224  float64_t update = 0.;
226  {
227  float32_t sum_abs_x = 0.;
228  float32_t exact_norm = compute_exact_norm(ex, sum_abs_x);
229  update = (env->eta * exact_norm)/sum_abs_x;
230  env->update_sum += update;
231  ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, exact_norm);
232  }
233  else
234  {
235  update = (env->eta)/pow(t, env->power_t) * ex->ld->weight;
236  ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, ex->total_sum_feat_sq);
237  }
238  env->update_sum += update;
239  }
240 
241  return prediction;
242 }
243 
244 void CVowpalWabbit::init(CStreamingVwFeatures* feat)
245 {
246  features = feat;
247 
248  if (feat)
249  env = feat->get_env();
250  else
251  {
252  env=new CVwEnvironment();
253  SG_REF(env);
254  }
255 
256  reg = new CVwRegressor(env);
257  SG_REF(reg);
258 
259  quiet = true;
260  no_training = false;
261  dump_interval = exp(1.);
262  sum_loss_since_last_dump = 0.;
263  reg_name = NULL;
264  reg_dump_text = true;
265  save_predictions = false;
266  prediction_fd = -1;
267 
268  w = reg->weight_vectors[0];
269  reg->weight_vectors[0] = NULL;
270  w_dim = 1 << env->num_bits;
271  bias = 0.;
272 }
273 
275 {
276  if (env->adaptive)
278  else
280  SG_REF(learner);
281 }
282 
283 float32_t CVowpalWabbit::inline_l1_predict(VwExample* &ex)
284 {
285  vw_size_t thread_num = 0;
286 
287  float32_t prediction = ex->ld->get_initial();
288 
289  float32_t* weights = reg->weight_vectors[thread_num];
290  vw_size_t thread_mask = env->thread_mask;
291 
292  prediction += features->dense_dot_truncated(weights, ex, env->l1_regularization * env->update_sum);
293 
294  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
295  {
296  char* i = env->pairs.get_element(k);
297 
298  v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
299  temp.begin = ex->atomics[(int32_t)(i[0])].begin;
300  temp.end = ex->atomics[(int32_t)(i[0])].end;
301  for (; temp.begin != temp.end; temp.begin++)
302  prediction += one_pf_quad_predict_trunc(weights, *temp.begin,
303  ex->atomics[(int32_t)(i[1])], thread_mask,
305  }
306 
307  return prediction;
308 }
309 
310 float32_t CVowpalWabbit::inline_predict(VwExample* &ex)
311 {
312  vw_size_t thread_num = 0;
313  float32_t prediction = ex->ld->initial;
314 
315  float32_t* weights = reg->weight_vectors[thread_num];
316  vw_size_t thread_mask = env->thread_mask;
317  prediction += features->dense_dot(weights, 0);
318 
319  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
320  {
321  char* i = env->pairs.get_element(k);
322 
323  v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
324  temp.begin = ex->atomics[(int32_t)(i[0])].begin;
325  temp.end = ex->atomics[(int32_t)(i[0])].end;
326  for (; temp.begin != temp.end; temp.begin++)
327  prediction += one_pf_quad_predict(weights, *temp.begin,
328  ex->atomics[(int32_t)(i[1])],
329  thread_mask);
330  }
331 
332  return prediction;
333 }
334 
335 float32_t CVowpalWabbit::finalize_prediction(float32_t ret)
336 {
337  if (isnan(ret))
338  return 0.5;
339  if (ret > env->max_label)
340  return env->max_label;
341  if (ret < env->min_label)
342  return env->min_label;
343 
344  return ret;
345 }
346 
347 void CVowpalWabbit::output_example(VwExample* &example)
348 {
349  if (!quiet)
350  {
351  sum_loss_since_last_dump += example->loss;
352  if (env->weighted_examples + example->ld->weight > dump_interval)
353  {
354  print_update(example);
355  dump_interval *= 2;
356  }
357  }
358 
359  if (save_predictions)
360  {
361  float32_t wt = 0.;
362  if (reg->weight_vectors)
363  wt = reg->weight_vectors[0][0];
364 
365  output_prediction(prediction_fd, example->final_prediction, wt * example->global_weight, example->tag);
366  }
367 }
368 
369 void CVowpalWabbit::print_update(VwExample* &ex)
370 {
371  SG_SPRINT("%-10.6f %-10.6f %8lld %8.1f %8.4f %8.4f %8lu\n",
372  (env->sum_loss + ex->loss)/(env->weighted_examples + ex->ld->weight),
373  sum_loss_since_last_dump/(env->weighted_examples + ex->ld->weight - old_weighted_examples),
374  env->example_number + 1,
375  env->weighted_examples + ex->ld->weight,
376  ex->ld->label,
377  ex->final_prediction,
378  (long unsigned int)ex->num_features);
379  sum_loss_since_last_dump = 0.0;
380  old_weighted_examples = env->weighted_examples + ex->ld->weight;
381 }
382 
383 
384 void CVowpalWabbit::output_prediction(int32_t f, float32_t res, float32_t weight, v_array<char> tag)
385 {
386  if (f >= 0)
387  {
388  char temp[30];
389  int32_t num = sprintf(temp, "%f", res);
390  ssize_t t;
391  t = write(f, temp, num);
392  if (t != num)
393  SG_SERROR("Write error!\n")
394 
395  if (tag.begin != tag.end)
396  {
397  temp[0] = ' ';
398  t = write(f, temp, 1);
399  if (t != 1)
400  SG_SERROR("Write error!\n")
401 
402  t = write(f, tag.begin, sizeof(char)*tag.index());
403  if (t != (ssize_t) (sizeof(char)*tag.index()))
404  SG_SERROR("Write error!\n")
405  }
406 
407  temp[0] = '\n';
408  t = write(f, temp, 1);
409  if (t != 1)
410  SG_SERROR("Write error!\n")
411  }
412 }
413 
414 void CVowpalWabbit::set_verbose(bool verbose)
415 {
416  quiet=verbose==false;
417 }
418 
419 
421 {
422  // We must traverse the features in _precisely_ the same order as during training.
423  vw_size_t thread_mask = env->thread_mask;
424  vw_size_t thread_num = 0;
425 
427  if (g == 0) return 0.;
428 
429  float32_t xGx = 0.;
430 
431  float32_t* weights = reg->weight_vectors[thread_num];
432  for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
433  {
434  for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
435  {
436  float32_t* w_vec = &weights[f->weight_index & thread_mask];
437  float32_t t = f->x * CMath::invsqrt(w_vec[1] + g * f->x * f->x);
438  xGx += t * f->x;
439  sum_abs_x += fabsf(f->x);
440  }
441  }
442 
443  for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
444  {
445  char* i = env->pairs.get_element(k);
446 
447  v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
448  temp.begin = ex->atomics[(int32_t)(i[0])].begin;
449  temp.end = ex->atomics[(int32_t)(i[0])].end;
450  for (; temp.begin != temp.end; temp.begin++)
451  xGx += compute_exact_norm_quad(weights, *temp.begin, ex->atomics[(int32_t)(i[1])], thread_mask, g, sum_abs_x);
452  }
453 
454  return xGx;
455 }
456 
458  vw_size_t mask, float32_t g, float32_t& sum_abs_x)
459 {
460  vw_size_t halfhash = quadratic_constant * page_feature.weight_index;
461  float32_t xGx = 0.;
462  float32_t update2 = g * page_feature.x * page_feature.x;
463  for (VwFeature* elem = offer_features.begin; elem != offer_features.end; elem++)
464  {
465  float32_t* w_vec = &weights[(halfhash + elem->weight_index) & mask];
466  float32_t t = elem->x * CMath::invsqrt(w_vec[1] + update2 * elem->x * elem->x);
467  xGx += t * elem->x;
468  sum_abs_x += fabsf(elem->x);
469  }
470  return xGx;
471 }
uint32_t weight_index
Hashed index in weight vector.
Definition: vw_example.h:41
uint32_t vw_size_t
vw_size_t typedef to work across platforms
Definition: vw_constants.h:26
CVwRegressor * reg
Regressor.
Definition: VowpalWabbit.h:288
T get_element(int32_t index) const
Definition: DynArray.h:142
Class OnlineLinearMachine is a generic interface for linear machines like classifiers which work thro...
void set_adaptive(bool adaptive_learning)
float64_t weighted_examples
Weighted examples.
T * end
Pointer to last set element in the array.
Definition: v_array.h:160
virtual void load_regressor(char *file_name)
virtual void init(CVwEnvironment *env_to_use=NULL)
Definition: VwRegressor.cpp:55
void set_prediction_out(char *file_name)
T * begin
Pointer to first element of the array.
Definition: v_array.h:157
Class CVwEnvironment is the environment used by VW.
Definition: VwEnvironment.h:41
CLossFunction * loss
Loss function.
Definition: VwRegressor.h:118
void set_stride(vw_size_t new_stride)
vw_size_t num_features
Number of features.
Definition: vw_example.h:89
void(* update)(float *foo, float bar)
Definition: JLCoverTree.h:529
float64_t min_label
Smallest label seen.
Class v_array taken directly from JL's implementation.
float32_t one_pf_quad_predict_trunc(float32_t *weights, VwFeature &f, v_array< VwFeature > &cross_features, vw_size_t mask, float32_t gravity)
Definition: vw_math.cpp:48
int64_t example_number
Example number.
float32_t total_sum_feat_sq
Total sum of square of features.
Definition: vw_example.h:106
Definition: basetag.h:132
float32_t ** weight_vectors
Weight vectors, one array for each thread.
Definition: VwRegressor.h:116
float32_t l1_regularization
Level of L1 regularization.
vw_size_t num_bits
log_2 of the number of features
int32_t get_num_elements() const
Definition: DynArray.h:130
VwAdaptiveLearner uses an adaptive subgradient technique to update weights.
float64_t get_loss(float64_t prediction, float64_t label)
Definition: VwRegressor.h:65
const int32_t quadratic_constant
Constant used while hashing/accessing quadratic features.
Definition: vw_constants.h:29
float32_t eta
Learning rate.
float32_t real_weight(float32_t w, float32_t gravity)
Definition: vw_math.h:35
CVwEnvironment * env
Environment for VW, i.e., globals.
Definition: VowpalWabbit.h:282
float64_t max_label
Largest label seen.
#define SG_REF(x)
Definition: SGObject.h:54
float32_t loss
Loss.
Definition: vw_example.h:95
float32_t label
Label value.
Definition: vw_label.h:92
vw_size_t pass
Pass.
Definition: vw_example.h:91
float32_t compute_exact_norm_quad(float32_t *weights, VwFeature &page_feature, v_array< VwFeature > &offer_features, vw_size_t mask, float32_t g, float32_t &sum_abs_x)
void load_regressor(char *file_name)
v_array< vw_size_t > indices
Array of namespaces.
Definition: vw_example.h:84
virtual void set_learner()
float32_t update_sum
Sum of updates.
float32_t get_initial()
Definition: vw_label.h:75
bool exact_adaptive_norm
Whether exact norm is used for adaptive learning.
virtual float32_t dense_dot_truncated(const float32_t *vec2, VwExample *&ex, float32_t gravity)
static float32_t invsqrt(float32_t x)
x^0.5, x being a complex128_t
Definition: Math.h:495
virtual CVwEnvironment * get_env()
#define SG_SPRINT(...)
Definition: SGIO.h:180
float32_t power_t
t power value while updating
float32_t weight
Weight of example.
Definition: vw_label.h:94
#define ASSERT(x)
Definition: SGIO.h:201
void push_back(T element)
Definition: DynArray.h:254
VwNonAdaptiveLearner uses a standard gradient descent weight update rule.
float32_t eta_decay_rate
Decay rate of eta per pass.
double float64_t
Definition: common.h:50
float32_t compute_exact_norm(VwExample *&ex, float32_t &sum_abs_x)
DynArray< char * > pairs
Pairs of features to cross for quadratic updates.
vw_size_t num_passes
Number of passes.
float32_t final_prediction
Final prediction.
Definition: vw_example.h:93
Regressor used by VW.
Definition: VwRegressor.h:37
virtual void train(VwExample *&ex, float32_t update)=0
vw_size_t stride
Number of elements in weight vector per feature.
v_array< char > tag
Tag.
Definition: vw_example.h:82
void set_exact_adaptive_norm(bool exact_adaptive)
Example class for VW.
Definition: vw_example.h:58
virtual float32_t predict_and_finalize(VwExample *ex)
float32_t example_t
t value for this example
Definition: vw_example.h:101
This class implements streaming features for use with VW.
void set_regressor_out(char *file_name, bool is_text=true)
float float32_t
Definition: common.h:49
float32_t initial
Initial approximation.
Definition: vw_label.h:96
float32_t global_weight
Global weight.
Definition: vw_example.h:99
One feature in VW.
Definition: vw_example.h:34
float32_t x
Feature value.
Definition: vw_example.h:38
#define SG_UNREF(x)
Definition: SGObject.h:55
virtual bool train_machine(CFeatures *feat=NULL)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
CStreamingVwFeatures * features
Features.
Definition: VowpalWabbit.h:279
The class Features is the base class of all feature objects.
Definition: Features.h:68
VwLabel * ld
Label object.
Definition: vw_example.h:79
float32_t eta_round
Learning rate for this round.
Definition: vw_example.h:97
void add_quadratic_pair(char *pair)
#define SG_SERROR(...)
Definition: SGIO.h:179
Class CVowpalWabbit is the implementation of the online learning algorithm used in Vowpal Wabbit...
Definition: VowpalWabbit.h:40
vw_size_t thread_mask
Mask used by regressor for learning.
bool adaptive
Whether adaptive learning is used.
float32_t one_pf_quad_predict(float32_t *weights, VwFeature &f, v_array< VwFeature > &cross_features, vw_size_t mask)
Definition: vw_math.cpp:40
virtual float32_t dense_dot(VwExample *&ex, const float32_t *vec2)
vw_size_t passes_complete
Number of passes complete.
virtual void dump_regressor(char *reg_name, bool as_text)
Definition: VwRegressor.cpp:93
float64_t get_update(float64_t prediction, float64_t label, float64_t eta_t, float64_t norm)
Definition: VwRegressor.h:80
CVwLearner * learner
Learner to use.
Definition: VowpalWabbit.h:285
virtual float64_t get_square_grad(float64_t prediction, float64_t label)=0
float64_t sum_loss
Sum of losses.
v_array< VwFeature > atomics[256]
Array of features.
Definition: vw_example.h:86

SHOGUN Machine Learning Toolbox - Documentation