SHOGUN: VowpalWabbit.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2009 Yahoo! Inc.  All rights reserved.  The copyrights
00003  * embodied in the content of this file are licensed under the BSD
00004  * (revised) open source license.
00005  *
00006  * This program is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 3 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * Written (W) 2011 Shashwat Lal Das
00012  * Adaptation of Vowpal Wabbit v5.1.
00013  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society.
00014  */
00015 
00016 #include <shogun/classifier/vw/VowpalWabbit.h>
00017 
00018 using namespace shogun;
00019 
00020 CVowpalWabbit::CVowpalWabbit()
00021     : COnlineLinearMachine()
00022 {
00023     reg=NULL;
00024     learner=NULL;
00025     init(NULL);
00026 }
00027 
00028 CVowpalWabbit::CVowpalWabbit(CStreamingVwFeatures* feat)
00029     : COnlineLinearMachine()
00030 {
00031     reg=NULL;
00032     learner=NULL;
00033     init(feat);
00034 }
00035 
00036 CVowpalWabbit::~CVowpalWabbit()
00037 {
00038     SG_UNREF(env);
00039     SG_UNREF(reg);
00040     SG_UNREF(learner);
00041 }
00042 
00043 void CVowpalWabbit::reinitialize_weights()
00044 {
00045     if (reg->weight_vectors)
00046     {
00047         if (reg->weight_vectors[0])
00048             SG_FREE(reg->weight_vectors[0]);
00049         SG_FREE(reg->weight_vectors);
00050     }
00051 
00052     reg->init(env);
00053     w = reg->weight_vectors[0];
00054 }
00055 
00056 void CVowpalWabbit::set_adaptive(bool adaptive_learning)
00057 {
00058     if (adaptive_learning)
00059     {
00060         env->adaptive = true;
00061         env->set_stride(2);
00062         env->power_t = 0.;
00063         reinitialize_weights();
00064     }
00065     else
00066         env->adaptive = false;
00067 }
00068 
00069 void CVowpalWabbit::set_exact_adaptive_norm(bool exact_adaptive)
00070 {
00071     if (exact_adaptive)
00072     {
00073         set_adaptive(true);
00074         env->exact_adaptive_norm = true;
00075     }
00076     else
00077         env->exact_adaptive_norm = false;
00078 }
00079 
00080 void CVowpalWabbit::load_regressor(char* file_name)
00081 {
00082     reg->load_regressor(file_name);
00083     w = reg->weight_vectors[0];
00084     w_dim = 1 << env->num_bits;
00085 }
00086 
00087 void CVowpalWabbit::set_regressor_out(char* file_name, bool is_text)
00088 {
00089     reg_name = file_name;
00090     reg_dump_text = is_text;
00091 }
00092 
00093 void CVowpalWabbit::set_prediction_out(char* file_name)
00094 {
00095     save_predictions = true;
00096     prediction_fd = open(file_name, O_CREAT|O_TRUNC|O_WRONLY, 0666);
00097     if (prediction_fd < 0)
00098         SG_SERROR("Unable to open prediction file %s for writing!\n", file_name);
00099 }
00100 
00101 void CVowpalWabbit::add_quadratic_pair(char* pair)
00102 {
00103     env->pairs.push_back(pair);
00104 }
00105 
00106 bool CVowpalWabbit::train_machine(CFeatures* feat)
00107 {
00108     ASSERT(features || feat);
00109     if (feat && (features != (CStreamingVwFeatures*) feat))
00110     {
00111         SG_UNREF(features);
00112         init((CStreamingVwFeatures*) feat);
00113     }
00114 
00115     set_learner();
00116 
00117     VwExample* example = NULL;
00118     vw_size_t current_pass = 0;
00119 
00120     const char* header_fmt = "%-10s %-10s %8s %8s %10s %8s %8s\n";
00121 
00122     if (!quiet)
00123     {
00124         SG_SPRINT(header_fmt,
00125               "average", "since", "example", "example",
00126               "current", "current", "current");
00127         SG_SPRINT(header_fmt,
00128               "loss", "last", "counter", "weight", "label", "predict", "features");
00129     }
00130 
00131     features->start_parser();
00132     while (env->passes_complete < env->num_passes)
00133     {
00134         while (features->get_next_example())
00135         {
00136             example = features->get_example();
00137 
00138             // Check if we shouldn't train (generally used for cache creation)
00139             if (!no_training)
00140             {
00141                 if (example->pass != current_pass)
00142                 {
00143                     env->eta *= env->eta_decay_rate;
00144                     current_pass = example->pass;
00145                 }
00146 
00147                 predict_and_finalize(example);
00148 
00149                 learner->train(example, example->eta_round);
00150                 example->eta_round = 0.;
00151 
00152                 output_example(example);
00153             }
00154 
00155             features->release_example();
00156         }
00157         env->passes_complete++;
00158         if (env->passes_complete < env->num_passes)
00159             features->reset_stream();
00160     }
00161     features->end_parser();
00162 
00163     if (env->l1_regularization > 0.)
00164     {
00165         uint32_t length = 1 << env->num_bits;
00166         vw_size_t stride = env->stride;
00167         float32_t gravity = env->l1_regularization * env->update_sum;
00168         for (uint32_t i = 0; i < length; i++)
00169             reg->weight_vectors[0][stride*i] = real_weight(reg->weight_vectors[0][stride*i], gravity);
00170     }
00171 
00172     if (reg_name != NULL)
00173         reg->dump_regressor(reg_name, reg_dump_text);
00174 
00175     return true;
00176 }
00177 
00178 float32_t CVowpalWabbit::predict_and_finalize(VwExample* ex)
00179 {
00180     float32_t prediction;
00181     if (env->l1_regularization != 0.)
00182         prediction = inline_l1_predict(ex);
00183     else
00184         prediction = inline_predict(ex);
00185 
00186     ex->final_prediction = 0;
00187     ex->final_prediction += prediction;
00188     ex->final_prediction = finalize_prediction(ex->final_prediction);
00189     float32_t t = ex->example_t;
00190 
00191     if (ex->ld->label != FLT_MAX)
00192     {
00193         ex->loss = reg->get_loss(ex->final_prediction, ex->ld->label) * ex->ld->weight;
00194         float64_t update = 0.;
00195         if (env->adaptive && env->exact_adaptive_norm)
00196         {
00197             float32_t sum_abs_x = 0.;
00198             float32_t exact_norm = compute_exact_norm(ex, sum_abs_x);
00199             update = (env->eta * exact_norm)/sum_abs_x;
00200             env->update_sum += update;
00201             ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, exact_norm);
00202         }
00203         else
00204         {
00205             update = (env->eta)/pow(t, env->power_t) * ex->ld->weight;
00206             ex->eta_round = reg->get_update(ex->final_prediction, ex->ld->label, update, ex->total_sum_feat_sq);
00207         }
00208         env->update_sum += update;
00209     }
00210 
00211     return prediction;
00212 }
00213 
00214 void CVowpalWabbit::init(CStreamingVwFeatures* feat)
00215 {
00216     features = feat;
00217     env = feat->get_env();
00218     reg = new CVwRegressor(env);
00219     SG_REF(env);
00220     SG_REF(reg);
00221 
00222     quiet = true;
00223     no_training = false;
00224     dump_interval = exp(1.);
00225     sum_loss_since_last_dump = 0.;
00226     reg_name = NULL;
00227     reg_dump_text = true;
00228     save_predictions = false;
00229     prediction_fd = -1;
00230 
00231     w = reg->weight_vectors[0];
00232     w_dim = 1 << env->num_bits;
00233     bias = 0.;
00234 }
00235 
00236 void CVowpalWabbit::set_learner()
00237 {
00238     if (env->adaptive)
00239         learner = new CVwAdaptiveLearner(reg, env);
00240     else
00241         learner = new CVwNonAdaptiveLearner(reg, env);
00242     SG_REF(learner);
00243 }
00244 
00245 float32_t CVowpalWabbit::inline_l1_predict(VwExample* &ex)
00246 {
00247     vw_size_t thread_num = 0;
00248 
00249     float32_t prediction = ex->ld->get_initial();
00250 
00251     float32_t* weights = reg->weight_vectors[thread_num];
00252     vw_size_t thread_mask = env->thread_mask;
00253 
00254     prediction += features->dense_dot_truncated(weights, ex, env->l1_regularization * env->update_sum);
00255 
00256     for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
00257     {
00258         char* i = env->pairs.get_element(k);
00259 
00260         v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
00261         temp.begin = ex->atomics[(int32_t)(i[0])].begin;
00262         temp.end = ex->atomics[(int32_t)(i[0])].end;
00263         for (; temp.begin != temp.end; temp.begin++)
00264             prediction += one_pf_quad_predict_trunc(weights, *temp.begin,
00265                                 ex->atomics[(int32_t)(i[1])], thread_mask,
00266                                 env->l1_regularization * env->update_sum);
00267     }
00268 
00269     return prediction;
00270 }
00271 
00272 float32_t CVowpalWabbit::inline_predict(VwExample* &ex)
00273 {
00274     vw_size_t thread_num = 0;
00275     float32_t prediction = ex->ld->initial;
00276 
00277     float32_t* weights = reg->weight_vectors[thread_num];
00278     vw_size_t thread_mask = env->thread_mask;
00279     prediction += features->dense_dot(weights, 0);
00280 
00281     for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
00282     {
00283         char* i = env->pairs.get_element(k);
00284 
00285         v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
00286         temp.begin = ex->atomics[(int32_t)(i[0])].begin;
00287         temp.end = ex->atomics[(int32_t)(i[0])].end;
00288         for (; temp.begin != temp.end; temp.begin++)
00289             prediction += one_pf_quad_predict(weights, *temp.begin,
00290                               ex->atomics[(int32_t)(i[1])],
00291                               thread_mask);
00292     }
00293 
00294     return prediction;
00295 }
00296 
00297 float32_t CVowpalWabbit::finalize_prediction(float32_t ret)
00298 {
00299     if (isnan(ret))
00300         return 0.5;
00301     if (ret > env->max_label)
00302         return env->max_label;
00303     if (ret < env->min_label)
00304         return env->min_label;
00305 
00306     return ret;
00307 }
00308 
00309 void CVowpalWabbit::output_example(VwExample* &example)
00310 {
00311     if (!quiet)
00312     {
00313         sum_loss_since_last_dump += example->loss;
00314         if (env->weighted_examples + example->ld->weight > dump_interval)
00315         {
00316             print_update(example);
00317             dump_interval *= 2;
00318         }
00319     }
00320 
00321     if (save_predictions)
00322     {
00323         float32_t wt = 0.;
00324         if (reg->weight_vectors)
00325             wt = reg->weight_vectors[0][0];
00326 
00327         output_prediction(prediction_fd, example->final_prediction, wt * example->global_weight, example->tag);
00328     }
00329 }
00330 
00331 void CVowpalWabbit::print_update(VwExample* &ex)
00332 {
00333     SG_SPRINT("%-10.6f %-10.6f %8lld %8.1f   %8.4f %8.4f %8lu\n",
00334           (env->sum_loss + ex->loss)/(env->weighted_examples + ex->ld->weight),
00335           sum_loss_since_last_dump/(env->weighted_examples + ex->ld->weight - old_weighted_examples),
00336           env->example_number + 1,
00337           env->weighted_examples + ex->ld->weight,
00338           ex->ld->label,
00339           ex->final_prediction,
00340           (long unsigned int)ex->num_features);
00341     sum_loss_since_last_dump = 0.0;
00342     old_weighted_examples = env->weighted_examples + ex->ld->weight;
00343 }
00344 
00345 
00346 void CVowpalWabbit::output_prediction(int32_t f, float32_t res, float32_t weight, v_array<char> tag)
00347 {
00348     if (f >= 0)
00349     {
00350         char temp[30];
00351         int32_t num = sprintf(temp, "%f", res);
00352         ssize_t t;
00353         t = write(f, temp, num);
00354         if (t != num)
00355             SG_SERROR("Write error!\n");
00356 
00357         if (tag.begin != tag.end)
00358         {
00359             temp[0] = ' ';
00360             t = write(f, temp, 1);
00361             if (t != 1)
00362                 SG_SERROR("Write error!\n");
00363 
00364             t = write(f, tag.begin, sizeof(char)*tag.index());
00365             if (t != (ssize_t) (sizeof(char)*tag.index()))
00366                 SG_SERROR("Write error!\n");
00367         }
00368 
00369         temp[0] = '\n';
00370         t = write(f, temp, 1);
00371         if (t != 1)
00372             SG_SERROR("Write error!\n");
00373     }
00374 }
00375 
00376 void CVowpalWabbit::set_verbose(bool verbose)
00377 {
00378     quiet=verbose==false;
00379 }
00380 
00381 
00382 float32_t CVowpalWabbit::compute_exact_norm(VwExample* &ex, float32_t& sum_abs_x)
00383 {
00384     // We must traverse the features in _precisely_ the same order as during training.
00385     vw_size_t thread_mask = env->thread_mask;
00386     vw_size_t thread_num = 0;
00387 
00388     float32_t g = reg->loss->get_square_grad(ex->final_prediction, ex->ld->label) * ex->ld->weight;
00389     if (g == 0) return 0.;
00390 
00391     float32_t xGx = 0.;
00392 
00393     float32_t* weights = reg->weight_vectors[thread_num];
00394     for (vw_size_t* i = ex->indices.begin; i != ex->indices.end; i++)
00395     {
00396         for (VwFeature* f = ex->atomics[*i].begin; f != ex->atomics[*i].end; f++)
00397         {
00398             float32_t* w_vec = &weights[f->weight_index & thread_mask];
00399             float32_t t = f->x * CMath::invsqrt(w_vec[1] + g * f->x * f->x);
00400             xGx += t * f->x;
00401             sum_abs_x += fabsf(f->x);
00402         }
00403     }
00404 
00405     for (int32_t k = 0; k < env->pairs.get_num_elements(); k++)
00406     {
00407         char* i = env->pairs.get_element(k);
00408 
00409         v_array<VwFeature> temp = ex->atomics[(int32_t)(i[0])];
00410         temp.begin = ex->atomics[(int32_t)(i[0])].begin;
00411         temp.end = ex->atomics[(int32_t)(i[0])].end;
00412         for (; temp.begin != temp.end; temp.begin++)
00413             xGx += compute_exact_norm_quad(weights, *temp.begin, ex->atomics[(int32_t)(i[1])], thread_mask, g, sum_abs_x);
00414     }
00415 
00416     return xGx;
00417 }
00418 
00419 float32_t CVowpalWabbit::compute_exact_norm_quad(float32_t* weights, VwFeature& page_feature, v_array<VwFeature> &offer_features,
00420                          vw_size_t mask, float32_t g, float32_t& sum_abs_x)
00421 {
00422     vw_size_t halfhash = quadratic_constant * page_feature.weight_index;
00423     float32_t xGx = 0.;
00424     float32_t update2 = g * page_feature.x * page_feature.x;
00425     for (VwFeature* elem = offer_features.begin; elem != offer_features.end; elem++)
00426     {
00427         float32_t* w_vec = &weights[(halfhash + elem->weight_index) & mask];
00428         float32_t t = elem->x * CMath::invsqrt(w_vec[1] + update2 * elem->x * elem->x);
00429         xGx += t * elem->x;
00430         sum_abs_x += fabsf(elem->x);
00431     }
00432     return xGx;
00433 }