00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include <shogun/classifier/svm/SGDQN.h>
00023 #include <shogun/base/Parameter.h>
00024 #include <shogun/lib/Signal.h>
00025 #include <shogun/mathematics/Math.h>
00026 #include <shogun/loss/HingeLoss.h>
00027 #include <shogun/labels/BinaryLabels.h>
00028
00029 using namespace shogun;
00030
00031 CSGDQN::CSGDQN()
00032 : CLinearMachine()
00033 {
00034 init();
00035 }
00036
00037 CSGDQN::CSGDQN(float64_t C)
00038 : CLinearMachine()
00039 {
00040 init();
00041
00042 C1=C;
00043 C2=C;
00044 }
00045
00046 CSGDQN::CSGDQN(float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00047 : CLinearMachine()
00048 {
00049 init();
00050 C1=C;
00051 C2=C;
00052
00053 set_features(traindat);
00054 set_labels(trainlab);
00055 }
00056
00057 CSGDQN::~CSGDQN()
00058 {
00059 SG_UNREF(loss);
00060 }
00061
00062 void CSGDQN::set_loss_function(CLossFunction* loss_func)
00063 {
00064 if (loss)
00065 SG_UNREF(loss);
00066 loss=loss_func;
00067 SG_REF(loss);
00068 }
00069
00070 void CSGDQN::compute_ratio(float64_t* W,float64_t* W_1,float64_t* B,float64_t* dst,int32_t dim,float64_t lambda,float64_t loss_val)
00071 {
00072 for (int32_t i=0; i < dim;i++)
00073 {
00074 float64_t diffw=W_1[i]-W[i];
00075 if(diffw)
00076 B[i]+=diffw/ (lambda*diffw+ loss_val*dst[i]);
00077 else
00078 B[i]+=1/lambda;
00079 }
00080 }
00081
00082 void CSGDQN::combine_and_clip(float64_t* Bc,float64_t* B,int32_t dim,float64_t c1,float64_t c2,float64_t v1,float64_t v2)
00083 {
00084 for (int32_t i=0; i < dim;i++)
00085 {
00086 if(B[i])
00087 {
00088 Bc[i] = Bc[i] * c1 + B[i] * c2;
00089 Bc[i]= CMath::min(CMath::max(Bc[i],v1),v2);
00090 }
00091 }
00092 }
00093
00094 bool CSGDQN::train(CFeatures* data)
00095 {
00096
00097 ASSERT(m_labels);
00098 ASSERT(m_labels->get_label_type() == LT_BINARY);
00099
00100 if (data)
00101 {
00102 if (!data->has_property(FP_DOT))
00103 SG_ERROR("Specified features are not of type CDotFeatures\n");
00104 set_features((CDotFeatures*) data);
00105 }
00106
00107 ASSERT(features);
00108
00109 int32_t num_train_labels=m_labels->get_num_labels();
00110 int32_t num_vec=features->get_num_vectors();
00111
00112 ASSERT(num_vec==num_train_labels);
00113 ASSERT(num_vec>0);
00114
00115 w=SGVector<float64_t>(features->get_dim_feature_space());
00116 w.zero();
00117
00118 float64_t lambda= 1.0/(C1*num_vec);
00119
00120
00121
00122
00123 float64_t maxw = 1.0 / sqrt(lambda);
00124 float64_t typw = sqrt(maxw);
00125 float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
00126 t = 1 / (eta0 * lambda);
00127
00128 SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
00129
00130
00131 float64_t* Bc=SG_MALLOC(float64_t, w.vlen);
00132 SGVector<float64_t>::fill_vector(Bc, w.vlen, 1/lambda);
00133
00134 float64_t* result=SG_MALLOC(float64_t, w.vlen);
00135 float64_t* B=SG_MALLOC(float64_t, w.vlen);
00136
00137
00138 calibrate();
00139
00140 SG_INFO("Training on %d vectors\n", num_vec);
00141 CSignal::clear_cancel();
00142
00143 ELossType loss_type = loss->get_loss_type();
00144 bool is_log_loss = false;
00145 if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
00146 is_log_loss = true;
00147
00148 for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
00149 {
00150 count = skip;
00151 bool updateB=false;
00152 for (int32_t i=0; i<num_vec; i++)
00153 {
00154 SGVector<float64_t> v = features->get_computed_dot_feature_vector(i);
00155 ASSERT(w.vlen==v.vlen);
00156 float64_t eta = 1.0/t;
00157 float64_t y = ((CBinaryLabels*) m_labels)->get_label(i);
00158 float64_t z = y * features->dense_dot(i, w.vector, w.vlen);
00159 if(updateB==true)
00160 {
00161 if (z < 1 || is_log_loss)
00162 {
00163 SGVector<float64_t> w_1=w.clone();
00164 float64_t loss_1=-loss->first_derivative(z,1);
00165 SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen);
00166 SGVector<float64_t>::add(w.vector,eta*loss_1*y,result,1.0,w.vector,w.vlen);
00167 float64_t z2 = y * features->dense_dot(i, w.vector, w.vlen);
00168 float64_t diffloss = -loss->first_derivative(z2,1) - loss_1;
00169 if(diffloss)
00170 {
00171 compute_ratio(w.vector,w_1.vector,B,v.vector,w.vlen,lambda,y*diffloss);
00172 if(t>skip)
00173 combine_and_clip(Bc,B,w.vlen,(t-skip)/(t+skip),2*skip/(t+skip),1/(100*lambda),100/lambda);
00174 else
00175 combine_and_clip(Bc,B,w.vlen,t/(t+skip),skip/(t+skip),1/(100*lambda),100/lambda);
00176 }
00177 }
00178 updateB=false;
00179 }
00180 else
00181 {
00182 if(--count<=0)
00183 {
00184 SGVector<float64_t>::vector_multiply(result,Bc,w.vector,w.vlen);
00185 SGVector<float64_t>::add(w.vector,-skip*lambda*eta,result,1.0,w.vector,w.vlen);
00186 count = skip;
00187 updateB=true;
00188 }
00189
00190 if (z < 1 || is_log_loss)
00191 {
00192 SGVector<float64_t>::vector_multiply(result,Bc,v.vector,w.vlen);
00193 SGVector<float64_t>::add(w.vector,eta*-loss->first_derivative(z,1)*y,result,1.0,w.vector,w.vlen);
00194 }
00195 }
00196 t++;
00197 }
00198 }
00199 SG_FREE(result);
00200 SG_FREE(B);
00201
00202 return true;
00203 }
00204
00205
00206
00207 void CSGDQN::calibrate()
00208 {
00209 ASSERT(features);
00210 int32_t num_vec=features->get_num_vectors();
00211 int32_t c_dim=features->get_dim_feature_space();
00212
00213 ASSERT(num_vec>0);
00214 ASSERT(c_dim>0);
00215
00216 SG_INFO("Estimating sparsity num_vec=%d num_feat=%d.\n", num_vec, c_dim);
00217
00218 int32_t n = 0;
00219 float64_t r = 0;
00220
00221 for (int32_t j=0; j<num_vec ; j++, n++)
00222 r += features->get_nnz_features_for_vector(j);
00223
00224
00225
00226 skip = (int32_t) ((16 * n * c_dim) / r);
00227 }
00228
00229 void CSGDQN::init()
00230 {
00231 t=0;
00232 C1=1;
00233 C2=1;
00234 epochs=5;
00235 skip=1000;
00236 count=1000;
00237
00238 loss=new CHingeLoss();
00239 SG_REF(loss);
00240
00241 m_parameters->add(&C1, "C1", "Cost constant 1.");
00242 m_parameters->add(&C2, "C2", "Cost constant 2.");
00243 m_parameters->add(&epochs, "epochs", "epochs");
00244 m_parameters->add(&skip, "skip", "skip");
00245 m_parameters->add(&count, "count", "count");
00246 }