00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include <shogun/classifier/svm/SVMSGD.h>
00024 #include <shogun/base/Parameter.h>
00025 #include <shogun/lib/Signal.h>
00026 #include <shogun/labels/BinaryLabels.h>
00027 #include <shogun/loss/HingeLoss.h>
00028
00029 using namespace shogun;
00030
00031 CSVMSGD::CSVMSGD()
00032 : CLinearMachine()
00033 {
00034 init();
00035 }
00036
00037 CSVMSGD::CSVMSGD(float64_t C)
00038 : CLinearMachine()
00039 {
00040 init();
00041
00042 C1=C;
00043 C2=C;
00044 }
00045
00046 CSVMSGD::CSVMSGD(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00047 : CLinearMachine()
00048 {
00049 init();
00050 C1=C;
00051 C2=C;
00052
00053 set_features(traindat);
00054 set_labels(trainlab);
00055 }
00056
00057 CSVMSGD::~CSVMSGD()
00058 {
00059 SG_UNREF(loss);
00060 }
00061
00062 void CSVMSGD::set_loss_function(CLossFunction* loss_func)
00063 {
00064 if (loss)
00065 SG_UNREF(loss);
00066 loss=loss_func;
00067 SG_REF(loss);
00068 }
00069
00070 bool CSVMSGD::train_machine(CFeatures* data)
00071 {
00072
00073 ASSERT(m_labels);
00074 ASSERT(m_labels->get_label_type() == LT_BINARY);
00075
00076 if (data)
00077 {
00078 if (!data->has_property(FP_DOT))
00079 SG_ERROR("Specified features are not of type CDotFeatures\n");
00080 set_features((CDotFeatures*) data);
00081 }
00082
00083 ASSERT(features);
00084
00085 int32_t num_train_labels=m_labels->get_num_labels();
00086 int32_t num_vec=features->get_num_vectors();
00087
00088 ASSERT(num_vec==num_train_labels);
00089 ASSERT(num_vec>0);
00090
00091 w=SGVector<float64_t>(features->get_dim_feature_space());
00092 w.zero();
00093 bias=0;
00094
00095 float64_t lambda= 1.0/(C1*num_vec);
00096
00097
00098
00099
00100 float64_t maxw = 1.0 / sqrt(lambda);
00101 float64_t typw = sqrt(maxw);
00102 float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
00103 t = 1 / (eta0 * lambda);
00104
00105 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
00106
00107
00108
00109 calibrate();
00110
00111 SG_INFO("Training on %d vectors\n", num_vec);
00112 CSignal::clear_cancel();
00113
00114 ELossType loss_type = loss->get_loss_type();
00115 bool is_log_loss = false;
00116 if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
00117 is_log_loss = true;
00118
00119 for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
00120 {
00121 count = skip;
00122 for (int32_t i=0; i<num_vec; i++)
00123 {
00124 float64_t eta = 1.0 / (lambda * t);
00125 float64_t y = ((CBinaryLabels*) m_labels)->get_label(i);
00126 float64_t z = y * (features->dense_dot(i, w.vector, w.vlen) + bias);
00127
00128 if (z < 1 || is_log_loss)
00129 {
00130 float64_t etd = -eta * loss->first_derivative(z,1);
00131 features->add_to_dense_vec(etd * y / wscale, i, w.vector, w.vlen);
00132
00133 if (use_bias)
00134 {
00135 if (use_regularized_bias)
00136 bias *= 1 - eta * lambda * bscale;
00137 bias += etd * y * bscale;
00138 }
00139 }
00140
00141 if (--count <= 0)
00142 {
00143 float64_t r = 1 - eta * lambda * skip;
00144 if (r < 0.8)
00145 r = pow(1 - eta * lambda, skip);
00146 SGVector<float64_t>::scale_vector(r, w.vector, w.vlen);
00147 count = skip;
00148 }
00149 t++;
00150 }
00151 }
00152
00153 float64_t wnorm = SGVector<float64_t>::dot(w.vector,w.vector, w.vlen);
00154 SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias);
00155
00156 return true;
00157 }
00158
00159 void CSVMSGD::calibrate()
00160 {
00161 ASSERT(features);
00162 int32_t num_vec=features->get_num_vectors();
00163 int32_t c_dim=features->get_dim_feature_space();
00164
00165 ASSERT(num_vec>0);
00166 ASSERT(c_dim>0);
00167
00168 float64_t* c=SG_MALLOC(float64_t, c_dim);
00169 memset(c, 0, c_dim*sizeof(float64_t));
00170
00171 SG_INFO("Estimating sparsity and bscale num_vec=%d num_feat=%d.\n", num_vec, c_dim);
00172
00173
00174 int32_t n = 0;
00175 float64_t m = 0;
00176 float64_t r = 0;
00177
00178 for (int32_t j=0; j<num_vec && m<=1000; j++, n++)
00179 {
00180 r += features->get_nnz_features_for_vector(j);
00181 features->add_to_dense_vec(1, j, c, c_dim, true);
00182
00183
00184
00185 m=SGVector<float64_t>::max(c, c_dim);
00186 }
00187
00188
00189 bscale = 0.5*m/n;
00190
00191
00192 skip = (int32_t) ((16 * n * c_dim) / r);
00193 SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale);
00194
00195 SG_FREE(c);
00196 }
00197
00198 void CSVMSGD::init()
00199 {
00200 t=1;
00201 C1=1;
00202 C2=1;
00203 wscale=1;
00204 bscale=1;
00205 epochs=5;
00206 skip=1000;
00207 count=1000;
00208 use_bias=true;
00209
00210 use_regularized_bias=false;
00211
00212 loss=new CHingeLoss();
00213 SG_REF(loss);
00214
00215 m_parameters->add(&C1, "C1", "Cost constant 1.");
00216 m_parameters->add(&C2, "C2", "Cost constant 2.");
00217 m_parameters->add(&wscale, "wscale", "W scale");
00218 m_parameters->add(&bscale, "bscale", "b scale");
00219 m_parameters->add(&epochs, "epochs", "epochs");
00220 m_parameters->add(&skip, "skip", "skip");
00221 m_parameters->add(&count, "count", "count");
00222 m_parameters->add(&use_bias, "use_bias", "Indicates if bias is used.");
00223 m_parameters->add(&use_regularized_bias, "use_regularized_bias", "Indicates if bias is regularized.");
00224 }