00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <shogun/classifier/svm/SVMSGD.h>
00024 #include <shogun/base/Parameter.h>
00025 #include <shogun/lib/Signal.h>
00026 #include <shogun/loss/HingeLoss.h>
00027
00028 using namespace shogun;
00029
00030 CSVMSGD::CSVMSGD()
00031 : CLinearMachine()
00032 {
00033 init();
00034 }
00035
00036 CSVMSGD::CSVMSGD(float64_t C)
00037 : CLinearMachine()
00038 {
00039 init();
00040
00041 C1=C;
00042 C2=C;
00043 }
00044
00045 CSVMSGD::CSVMSGD(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00046 : CLinearMachine()
00047 {
00048 init();
00049 C1=C;
00050 C2=C;
00051
00052 set_features(traindat);
00053 set_labels(trainlab);
00054 }
00055
00056 CSVMSGD::~CSVMSGD()
00057 {
00058 SG_UNREF(loss);
00059 }
00060
00061 void CSVMSGD::set_loss_function(CLossFunction* loss_func)
00062 {
00063 if (loss)
00064 SG_UNREF(loss);
00065 loss=loss_func;
00066 SG_REF(loss);
00067 }
00068
00069 bool CSVMSGD::train_machine(CFeatures* data)
00070 {
00071
00072 ASSERT(labels);
00073
00074 if (data)
00075 {
00076 if (!data->has_property(FP_DOT))
00077 SG_ERROR("Specified features are not of type CDotFeatures\n");
00078 set_features((CDotFeatures*) data);
00079 }
00080
00081 ASSERT(features);
00082 ASSERT(labels->is_two_class_labeling());
00083
00084 int32_t num_train_labels=labels->get_num_labels();
00085 w_dim=features->get_dim_feature_space();
00086 int32_t num_vec=features->get_num_vectors();
00087
00088 ASSERT(num_vec==num_train_labels);
00089 ASSERT(num_vec>0);
00090
00091 SG_FREE(w);
00092 w=SG_MALLOC(float64_t, w_dim);
00093 memset(w, 0, w_dim*sizeof(float64_t));
00094 bias=0;
00095
00096 float64_t lambda= 1.0/(C1*num_vec);
00097
00098
00099
00100
00101 float64_t maxw = 1.0 / sqrt(lambda);
00102 float64_t typw = sqrt(maxw);
00103 float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
00104 t = 1 / (eta0 * lambda);
00105
00106 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
00107
00108
00109
00110 calibrate();
00111
00112 SG_INFO("Training on %d vectors\n", num_vec);
00113 CSignal::clear_cancel();
00114
00115 ELossType loss_type = loss->get_loss_type();
00116 bool is_log_loss = false;
00117 if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
00118 is_log_loss = true;
00119
00120 for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
00121 {
00122 count = skip;
00123 for (int32_t i=0; i<num_vec; i++)
00124 {
00125 float64_t eta = 1.0 / (lambda * t);
00126 float64_t y = labels->get_label(i);
00127 float64_t z = y * (features->dense_dot(i, w, w_dim) + bias);
00128
00129 if (z < 1 || is_log_loss)
00130 {
00131 float64_t etd = -eta * loss->first_derivative(z,1);
00132 features->add_to_dense_vec(etd * y / wscale, i, w, w_dim);
00133
00134 if (use_bias)
00135 {
00136 if (use_regularized_bias)
00137 bias *= 1 - eta * lambda * bscale;
00138 bias += etd * y * bscale;
00139 }
00140 }
00141
00142 if (--count <= 0)
00143 {
00144 float64_t r = 1 - eta * lambda * skip;
00145 if (r < 0.8)
00146 r = pow(1 - eta * lambda, skip);
00147 CMath::scale_vector(r, w, w_dim);
00148 count = skip;
00149 }
00150 t++;
00151 }
00152 }
00153
00154 float64_t wnorm = CMath::dot(w,w, w_dim);
00155 SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias);
00156
00157 return true;
00158 }
00159
00160 void CSVMSGD::calibrate()
00161 {
00162 ASSERT(features);
00163 int32_t num_vec=features->get_num_vectors();
00164 int32_t c_dim=features->get_dim_feature_space();
00165
00166 ASSERT(num_vec>0);
00167 ASSERT(c_dim>0);
00168
00169 float64_t* c=SG_MALLOC(float64_t, c_dim);
00170 memset(c, 0, c_dim*sizeof(float64_t));
00171
00172 SG_INFO("Estimating sparsity and bscale num_vec=%d num_feat=%d.\n", num_vec, c_dim);
00173
00174
00175 int32_t n = 0;
00176 float64_t m = 0;
00177 float64_t r = 0;
00178
00179 for (int32_t j=0; j<num_vec && m<=1000; j++, n++)
00180 {
00181 r += features->get_nnz_features_for_vector(j);
00182 features->add_to_dense_vec(1, j, c, c_dim, true);
00183
00184
00185
00186 m=CMath::max(c, c_dim);
00187 }
00188
00189
00190 bscale = 0.5*m/n;
00191
00192
00193 skip = (int32_t) ((16 * n * c_dim) / r);
00194 SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale);
00195
00196 SG_FREE(c);
00197 }
00198
00199 void CSVMSGD::init()
00200 {
00201 t=1;
00202 C1=1;
00203 C2=1;
00204 wscale=1;
00205 bscale=1;
00206 epochs=5;
00207 skip=1000;
00208 count=1000;
00209 use_bias=true;
00210
00211 use_regularized_bias=false;
00212
00213 loss=new CHingeLoss();
00214 SG_REF(loss);
00215
00216 m_parameters->add(&C1, "C1", "Cost constant 1.");
00217 m_parameters->add(&C2, "C2", "Cost constant 2.");
00218 m_parameters->add(&wscale, "wscale", "W scale");
00219 m_parameters->add(&bscale, "bscale", "b scale");
00220 m_parameters->add(&epochs, "epochs", "epochs");
00221 m_parameters->add(&skip, "skip", "skip");
00222 m_parameters->add(&count, "count", "count");
00223 m_parameters->add(&use_bias, "use_bias", "Indicates if bias is used.");
00224 m_parameters->add(&use_regularized_bias, "use_regularized_bias", "Indicates if bias is regularized.");
00225 }