00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <shogun/classifier/svm/OnlineSVMSGD.h>
00024 #include <shogun/base/Parameter.h>
00025 #include <shogun/lib/Signal.h>
00026 #include <shogun/loss/HingeLoss.h>
00027
00028 using namespace shogun;
00029
00030 COnlineSVMSGD::COnlineSVMSGD()
00031 : COnlineLinearMachine()
00032 {
00033 init();
00034 }
00035
00036 COnlineSVMSGD::COnlineSVMSGD(float64_t C)
00037 : COnlineLinearMachine()
00038 {
00039 init();
00040
00041 C1=C;
00042 C2=C;
00043 }
00044
00045 COnlineSVMSGD::COnlineSVMSGD(float64_t C, CStreamingDotFeatures* traindat)
00046 : COnlineLinearMachine()
00047 {
00048 init();
00049 C1=C;
00050 C2=C;
00051
00052 set_features(traindat);
00053 }
00054
00055 COnlineSVMSGD::~COnlineSVMSGD()
00056 {
00057 SG_UNREF(loss);
00058 }
00059
00060 void COnlineSVMSGD::set_loss_function(CLossFunction* loss_func)
00061 {
00062 if (loss)
00063 SG_UNREF(loss);
00064 loss=loss_func;
00065 SG_REF(loss);
00066 }
00067
00068 bool COnlineSVMSGD::train(CFeatures* data)
00069 {
00070 if (data)
00071 {
00072 if (!data->has_property(FP_STREAMING_DOT))
00073 SG_ERROR("Specified features are not of type CStreamingDotFeatures\n");
00074 set_features((CStreamingDotFeatures*) data);
00075 }
00076
00077 features->start_parser();
00078
00079
00080 ASSERT(features);
00081 ASSERT(features->get_has_labels());
00082 if (w)
00083 SG_FREE(w);
00084 w_dim=1;
00085 w=new float32_t;
00086 bias=0;
00087
00088
00089
00090
00091 float64_t maxw = 1.0 / sqrt(lambda);
00092 float64_t typw = sqrt(maxw);
00093 float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
00094 t = 1 / (eta0 * lambda);
00095
00096 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
00097
00098
00099 calibrate();
00100 if (features->is_seekable())
00101 features->reset_stream();
00102
00103 CSignal::clear_cancel();
00104
00105 ELossType loss_type = loss->get_loss_type();
00106 bool is_log_loss = false;
00107 if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
00108 is_log_loss = true;
00109
00110 int32_t vec_count;
00111 for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
00112 {
00113 vec_count=0;
00114 count = skip;
00115 while (features->get_next_example())
00116 {
00117 vec_count++;
00118
00119 features->expand_if_required(w, w_dim);
00120
00121 float64_t eta = 1.0 / (lambda * t);
00122 float64_t y = features->get_label();
00123 float64_t z = y * (features->dense_dot(w, w_dim) + bias);
00124
00125 if (z < 1 || is_log_loss)
00126 {
00127 float64_t etd = -eta * loss->first_derivative(z,1);
00128 features->add_to_dense_vec(etd * y / wscale, w, w_dim);
00129
00130 if (use_bias)
00131 {
00132 if (use_regularized_bias)
00133 bias *= 1 - eta * lambda * bscale;
00134 bias += etd * y * bscale;
00135 }
00136 }
00137
00138 if (--count <= 0)
00139 {
00140 float32_t r = 1 - eta * lambda * skip;
00141 if (r < 0.8)
00142 r = pow(1 - eta * lambda, skip);
00143 SGVector<float32_t>::scale_vector(r, w, w_dim);
00144 count = skip;
00145 }
00146 t++;
00147
00148 features->release_example();
00149 }
00150
00151
00152
00153 if (features->is_seekable() && e < epochs-1)
00154 features->reset_stream();
00155 else
00156 break;
00157
00158 }
00159
00160 features->end_parser();
00161 float64_t wnorm = SGVector<float32_t>::dot(w,w, w_dim);
00162 SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias);
00163
00164 return true;
00165 }
00166
00167 void COnlineSVMSGD::calibrate(int32_t max_vec_num)
00168 {
00169 int32_t c_dim=1;
00170 float32_t* c=new float32_t;
00171
00172
00173 int32_t n = 0;
00174 float64_t m = 0;
00175 float64_t r = 0;
00176
00177 while (features->get_next_example())
00178 {
00179
00180 features->expand_if_required(c, c_dim);
00181
00182 r += features->get_nnz_features_for_vector();
00183 features->add_to_dense_vec(1, c, c_dim, true);
00184
00185
00186
00187 m=SGVector<float32_t>::max(c, c_dim);
00188 n++;
00189
00190 features->release_example();
00191 if (n>=max_vec_num || m > 1000)
00192 break;
00193 }
00194
00195 SG_PRINT("Online SGD calibrated using %d vectors.\n", n);
00196
00197
00198 bscale = 0.5*m/n;
00199
00200
00201 skip = (int32_t) ((16 * n * c_dim) / r);
00202
00203 SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale);
00204
00205 SG_FREE(c);
00206 }
00207
00208 void COnlineSVMSGD::init()
00209 {
00210 t=1;
00211 C1=1;
00212 C2=1;
00213 lambda=1e-4;
00214 wscale=1;
00215 bscale=1;
00216 epochs=1;
00217 skip=1000;
00218 count=1000;
00219 use_bias=true;
00220
00221 use_regularized_bias=false;
00222
00223 loss=new CHingeLoss();
00224 SG_REF(loss);
00225
00226 m_parameters->add(&C1, "C1", "Cost constant 1.");
00227 m_parameters->add(&C2, "C2", "Cost constant 2.");
00228 m_parameters->add(&lambda, "lambda", "Regularization parameter.");
00229 m_parameters->add(&wscale, "wscale", "W scale");
00230 m_parameters->add(&bscale, "bscale", "b scale");
00231 m_parameters->add(&epochs, "epochs", "epochs");
00232 m_parameters->add(&skip, "skip", "skip");
00233 m_parameters->add(&count, "count", "count");
00234 m_parameters->add(&use_bias, "use_bias", "Indicates if bias is used.");
00235 m_parameters->add(&use_regularized_bias, "use_regularized_bias", "Indicates if bias is regularized.");
00236 }