00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <shogun/classifier/svm/SGDQN.h>
00023 #include <shogun/base/Parameter.h>
00024 #include <shogun/lib/Signal.h>
00025 #include <shogun/mathematics/Math.h>
00026 #include <shogun/loss/HingeLoss.h>
00027
00028 using namespace shogun;
00029
00030 CSGDQN::CSGDQN()
00031 : CLinearMachine()
00032 {
00033 init();
00034 }
00035
00036 CSGDQN::CSGDQN(float64_t C)
00037 : CLinearMachine()
00038 {
00039 init();
00040
00041 C1=C;
00042 C2=C;
00043 }
00044
00045 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00046 : CLinearMachine()
00047 {
00048 init();
00049 C1=C;
00050 C2=C;
00051
00052 set_features(traindat);
00053 set_labels(trainlab);
00054 }
00055
00056 CSGDQN::~CSGDQN()
00057 {
00058 SG_UNREF(loss);
00059 }
00060
00061 void CSGDQN::set_loss_function(CLossFunction* loss_func)
00062 {
00063 if (loss)
00064 SG_UNREF(loss);
00065 loss=loss_func;
00066 SG_REF(loss);
00067 }
00068
00069 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val)
00070 {
00071 for (int32_t i=0; i < dim;i++)
00072 {
00073 float64_t diffw=W_1[i]-W[i];
00074 if(diffw)
00075 B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]);
00076 else
00077 B[i]+=1/lambda;
00078 }
00079 }
00080
00081 void CSGDQN::combine_and_clip(float64_t* Bc,float64_t* B,int32_t dim,float64_t c1,float64_t c2,float64_t v1,float64_t v2)
00082 {
00083 for (int32_t i=0; i < dim;i++)
00084 {
00085 if(B[i])
00086 {
00087 Bc[i] = Bc[i] * c1 + B[i] * c2;
00088 Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2);
00089 }
00090 }
00091 }
00092
00093 bool CSGDQN::train(CFeatures* data)
00094 {
00095
00096 ASSERT(labels);
00097
00098 if (data)
00099 {
00100 if (!data->has_property(FP_DOT))
00101 SG_ERROR("Specified features are not of type CDotFeatures\n");
00102 set_features((CDotFeatures*) data);
00103 }
00104
00105 ASSERT(features);
00106 ASSERT(labels->is_two_class_labeling());
00107
00108 int32_t num_train_labels=labels->get_num_labels();
00109 w_dim=features->get_dim_feature_space();
00110 int32_t num_vec=features->get_num_vectors();
00111
00112 ASSERT(num_vec==num_train_labels);
00113 ASSERT(num_vec>0);
00114
00115 SG_FREE(w);
00116 w=SG_MALLOC(float64_t, w_dim);
00117 memset(w, 0, w_dim*sizeof(float64_t));
00118
00119 float64_t lambda= 1.0/(C1*num_vec);
00120
00121
00122
00123
00124 float64_t maxw = 1.0 / sqrt(lambda);
00125 float64_t typw = sqrt(maxw);
00126 float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
00127 t = 1 / (eta0 * lambda);
00128
00129 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
00130
00131
00132 float64_t* Bc=SG_MALLOC(float64_t, w_dim);
00133 CMath::fill_vector(Bc, w_dim, 1/lambda);
00134
00135 float64_t* result=SG_MALLOC(float64_t, w_dim);
00136 float64_t* B=SG_MALLOC(float64_t, w_dim);
00137 float64_t* w_1=SG_MALLOC(float64_t, w_dim);
00138
00139
00140 calibrate();
00141
00142 SG_INFO("Training on %d vectors\n", num_vec);
00143 CSignal::clear_cancel();
00144
00145 ELossType loss_type = loss->get_loss_type();
00146 bool is_log_loss = false;
00147 if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
00148 is_log_loss = true;
00149
00150 for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
00151 {
00152 count = skip;
00153 bool updateB=false;
00154 for (int32_t i=0; i<num_vec; i++)
00155 {
00156 SGVector<float64_t> v = features->get_computed_dot_feature_vector(i);
00157 ASSERT(w_dim==v.vlen);
00158 float64_t eta = 1.0/t;
00159 float64_t y = labels->get_label(i);
00160 float64_t z = y * features->dense_dot(i, w, w_dim);
00161 if(updateB==true)
00162 {
00163 if (z < 1 || is_log_loss)
00164 {
00165 w_1=w;
00166 float64_t loss_1=-loss->first_derivative(z,1);
00167 CMath::vector_multiply(result,Bc,v.vector,w_dim);
00168 CMath::add(w,eta*loss_1*y,result,1.0,w,w_dim);
00169 float64_t z2 = y * features->dense_dot(i, w, w_dim);
00170 float64_t diffloss = -loss->first_derivative(z2,1) - loss_1;
00171 if(diffloss)
00172 {
00173 compute_ratio(w,w_1,B,v.vector,w_dim,lambda,y*diffloss);
00174 if(t>skip)
00175 combine_and_clip(Bc,B,w_dim,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda);
00176 else
00177 combine_and_clip(Bc,B,w_dim,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda);
00178 }
00179 }
00180 updateB=false;
00181 }
00182 else
00183 {
00184 if(--count<=0)
00185 {
00186 CMath::vector_multiply(result,Bc,w,w_dim);
00187 CMath::add(w,-skip*lambda*eta,result,1.0,w,w_dim);
00188 count = skip;
00189 updateB=true;
00190 }
00191
00192 if (z < 1 || is_log_loss)
00193 {
00194 CMath::vector_multiply(result,Bc,v.vector,w_dim);
00195 CMath::add(w,eta*-loss->first_derivative(z,1)*y,result,1.0,w,w_dim);
00196 }
00197 }
00198 t++;
00199
00200 v.free_vector();
00201 }
00202 }
00203 SG_FREE(result);
00204 SG_FREE(w_1);
00205 SG_FREE(B);
00206
00207 return true;
00208 }
00209
00210
00211
00212 void CSGDQN::calibrate()
00213 {
00214 ASSERT(features);
00215 int32_t num_vec=features->get_num_vectors();
00216 int32_t c_dim=features->get_dim_feature_space();
00217
00218 ASSERT(num_vec>0);
00219 ASSERT(c_dim>0);
00220
00221 SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim);
00222
00223 int32_t n = 0;
00224 float64_t r = 0;
00225
00226 for (int32_t j=0; j<num_vec ; j++, n++)
00227 r += features->get_nnz_features_for_vector(j);
00228
00229
00230
00231 skip = (int32_t) ((16 * n * c_dim) / r);
00232 }
00233
00234 void CSGDQN::init()
00235 {
00236 t=0;
00237 C1=1;
00238 C2=1;
00239 epochs=5;
00240 skip=1000;
00241 count=1000;
00242
00243 loss=new CHingeLoss();
00244 SG_REF(loss);
00245
00246 m_parameters->add(&C1, "C1", "Cost constant 1.");
00247 m_parameters->add(&C2, "C2", "Cost constant 2.");
00248 m_parameters->add(&epochs, "epochs", "epochs");
00249 m_parameters->add(&skip, "skip", "skip");
00250 m_parameters->add(&count, "count", "count");
00251 }