SHOGUN: SVM_linear.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2007-2009 The LIBLINEAR Project.
00003  * All rights reserved.
00004  * 
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  * 
00009  * 1. Redistributions of source code must retain the above copyright
00010  * notice, this list of conditions and the following disclaimer.
00011  * 
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  * notice, this list of conditions and the following disclaimer in the
00014  * documentation and/or other materials provided with the distribution.
00015  * 
00016  * 3. Neither name of copyright holders nor the names of its contributors
00017  * may be used to endorse or promote products derived from this software
00018  * without specific prior written permission.
00019  * 
00020  * 
00021  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
00025  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  */
00033 #include <shogun/lib/config.h>
00034 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00035 #ifdef HAVE_LAPACK
00036 #include <math.h>
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #include <string.h>
00040 #include <stdarg.h>
00041 
00042 #include <shogun/mathematics/Math.h>
00043 #include <shogun/classifier/svm/SVM_linear.h>
00044 #include <shogun/classifier/svm/Tron.h>
00045 
00046 using namespace shogun;
00047 
00048 l2r_lr_fun::l2r_lr_fun(const problem *p, float64_t Cp, float64_t Cn)
00049 {
00050     int i;
00051     int l=p->l;
00052     int *y=p->y;
00053 
00054     this->prob = p;
00055 
00056     z = SG_MALLOC(double, l);
00057     D = SG_MALLOC(double, l);
00058     C = SG_MALLOC(double, l);
00059 
00060     for (i=0; i<l; i++)
00061     {
00062         if (y[i] == 1)
00063             C[i] = Cp;
00064         else
00065             C[i] = Cn;
00066     }
00067 }
00068 
00069 l2r_lr_fun::~l2r_lr_fun()
00070 {
00071     SG_FREE(z);
00072     SG_FREE(D);
00073     SG_FREE(C);
00074 }
00075 
00076 
00077 double l2r_lr_fun::fun(double *w)
00078 {
00079     int i;
00080     double f=0;
00081     int *y=prob->y;
00082     int l=prob->l;
00083     int32_t n=prob->n;
00084 
00085     Xv(w, z);
00086     for(i=0;i<l;i++)
00087     {
00088         double yz = y[i]*z[i];
00089         if (yz >= 0)
00090             f += C[i]*log(1 + exp(-yz));
00091         else
00092             f += C[i]*(-yz+log(1 + exp(yz)));
00093     }
00094     f += 0.5 *CMath::dot(w,w,n);
00095 
00096     return(f);
00097 }
00098 
00099 void l2r_lr_fun::grad(double *w, double *g)
00100 {
00101     int i;
00102     int *y=prob->y;
00103     int l=prob->l;
00104     int w_size=get_nr_variable();
00105 
00106     for(i=0;i<l;i++)
00107     {
00108         z[i] = 1/(1 + exp(-y[i]*z[i]));
00109         D[i] = z[i]*(1-z[i]);
00110         z[i] = C[i]*(z[i]-1)*y[i];
00111     }
00112     XTv(z, g);
00113 
00114     for(i=0;i<w_size;i++)
00115         g[i] = w[i] + g[i];
00116 }
00117 
00118 int l2r_lr_fun::get_nr_variable()
00119 {
00120     return prob->n;
00121 }
00122 
00123 void l2r_lr_fun::Hv(double *s, double *Hs)
00124 {
00125     int i;
00126     int l=prob->l;
00127     int w_size=get_nr_variable();
00128     double *wa = SG_MALLOC(double, l);
00129 
00130     Xv(s, wa);
00131     for(i=0;i<l;i++)
00132         wa[i] = C[i]*D[i]*wa[i];
00133 
00134     XTv(wa, Hs);
00135     for(i=0;i<w_size;i++)
00136         Hs[i] = s[i] + Hs[i];
00137     SG_FREE(wa);
00138 }
00139 
00140 void l2r_lr_fun::Xv(double *v, double *res_Xv)
00141 {
00142     int32_t l=prob->l;
00143     int32_t n=prob->n;
00144     float64_t bias=0;
00145 
00146     if (prob->use_bias)
00147     {
00148         n--;
00149         bias=v[n];
00150     }
00151 
00152     prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
00153 }
00154 
00155 void l2r_lr_fun::XTv(double *v, double *res_XTv)
00156 {
00157     int l=prob->l;
00158     int32_t n=prob->n;
00159 
00160     memset(res_XTv, 0, sizeof(double)*prob->n);
00161 
00162     if (prob->use_bias)
00163         n--;
00164 
00165     for (int32_t i=0;i<l;i++)
00166     {
00167         prob->x->add_to_dense_vec(v[i], i, res_XTv, n);
00168 
00169         if (prob->use_bias)
00170             res_XTv[n]+=v[i];
00171     }
00172 }
00173 
00174 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *p, double Cp, double Cn)
00175 {
00176     int i;
00177     int l=p->l;
00178     int *y=p->y;
00179 
00180     this->prob = p;
00181 
00182     z = SG_MALLOC(double, l);
00183     D = SG_MALLOC(double, l);
00184     C = SG_MALLOC(double, l);
00185     I = SG_MALLOC(int, l);
00186 
00187     for (i=0; i<l; i++)
00188     {
00189         if (y[i] == 1)
00190             C[i] = Cp;
00191         else
00192             C[i] = Cn;
00193     }
00194 }
00195 
00196 l2r_l2_svc_fun::~l2r_l2_svc_fun()
00197 {
00198     SG_FREE(z);
00199     SG_FREE(D);
00200     SG_FREE(C);
00201     SG_FREE(I);
00202 }
00203 
00204 double l2r_l2_svc_fun::fun(double *w)
00205 {
00206     int i;
00207     double f=0;
00208     int *y=prob->y;
00209     int l=prob->l;
00210     int w_size=get_nr_variable();
00211 
00212     Xv(w, z);
00213     for(i=0;i<l;i++)
00214     {
00215         z[i] = y[i]*z[i];
00216         double d = 1-z[i];
00217         if (d > 0)
00218             f += C[i]*d*d;
00219     }
00220     f += 0.5*CMath::dot(w, w, w_size);
00221 
00222     return(f);
00223 }
00224 
00225 void l2r_l2_svc_fun::grad(double *w, double *g)
00226 {
00227     int i;
00228     int *y=prob->y;
00229     int l=prob->l;
00230     int w_size=get_nr_variable();
00231 
00232     sizeI = 0;
00233     for (i=0;i<l;i++)
00234         if (z[i] < 1)
00235         {
00236             z[sizeI] = C[i]*y[i]*(z[i]-1);
00237             I[sizeI] = i;
00238             sizeI++;
00239         }
00240     subXTv(z, g);
00241 
00242     for(i=0;i<w_size;i++)
00243         g[i] = w[i] + 2*g[i];
00244 }
00245 
00246 int l2r_l2_svc_fun::get_nr_variable()
00247 {
00248     return prob->n;
00249 }
00250 
00251 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
00252 {
00253     int i;
00254     int l=prob->l;
00255     int w_size=get_nr_variable();
00256     double *wa = SG_MALLOC(double, l);
00257 
00258     subXv(s, wa);
00259     for(i=0;i<sizeI;i++)
00260         wa[i] = C[I[i]]*wa[i];
00261 
00262     subXTv(wa, Hs);
00263     for(i=0;i<w_size;i++)
00264         Hs[i] = s[i] + 2*Hs[i];
00265     SG_FREE(wa);
00266 }
00267 
00268 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv)
00269 {
00270     int32_t l=prob->l;
00271     int32_t n=prob->n;
00272     float64_t bias=0;
00273 
00274     if (prob->use_bias)
00275     {
00276         n--;
00277         bias=v[n];
00278     }
00279 
00280     prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
00281 }
00282 
00283 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv)
00284 {
00285     int32_t n=prob->n;
00286     float64_t bias=0;
00287 
00288     if (prob->use_bias)
00289     {
00290         n--;
00291         bias=v[n];
00292     }
00293 
00294     prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias);
00295 
00296     /*for (int32_t i=0;i<sizeI;i++)
00297     {
00298         res_Xv[i]=prob->x->dense_dot(I[i], v, n);
00299 
00300         if (prob->use_bias)
00301             res_Xv[i]+=bias;
00302     }*/
00303 }
00304 
00305 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
00306 {
00307     int32_t n=prob->n;
00308 
00309     if (prob->use_bias)
00310         n--;
00311 
00312     memset(XTv, 0, sizeof(float64_t)*prob->n);
00313     for (int32_t i=0;i<sizeI;i++)
00314     {
00315         prob->x->add_to_dense_vec(v[i], I[i], XTv, n);
00316 
00317         if (prob->use_bias)
00318             XTv[n]+=v[i];
00319     }
00320 }
00321 
00322 // A coordinate descent algorithm for 
00323 // multi-class support vector machines by Crammer and Singer
00324 //
00325 //  min_{\alpha}  0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
00326 //    s.t.     \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
00327 // 
00328 //  where e^m_i = 0 if y_i  = m,
00329 //        e^m_i = 1 if y_i != m,
00330 //  C^m_i = C if m  = y_i, 
00331 //  C^m_i = 0 if m != y_i, 
00332 //  and w_m(\alpha) = \sum_i \alpha^m_i x_i 
00333 //
00334 // Given: 
00335 // x, y, C
00336 // eps is the stopping tolerance
00337 //
00338 // solution will be put in w
00339 
00340 #define GETI(i) (prob->y[i])
00341 // To support weights for instances, use GETI(i) (i)
00342 
00343 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, double *weighted_C, double epsilon, int max_it)
00344 {
00345     this->w_size = prob->n;
00346     this->l = prob->l;
00347     this->nr_class = n_class;
00348     this->eps = epsilon;
00349     this->max_iter = max_it;
00350     this->prob = p;
00351     this->C = weighted_C;
00352     this->B = SG_MALLOC(double, nr_class);
00353     this->G = SG_MALLOC(double, nr_class);
00354 }
00355 
00356 Solver_MCSVM_CS::~Solver_MCSVM_CS()
00357 {
00358     SG_FREE(B);
00359     SG_FREE(G);
00360 }
00361 
00362 int compare_double(const void *a, const void *b)
00363 {
00364     if(*(double *)a > *(double *)b)
00365         return -1;
00366     if(*(double *)a < *(double *)b)
00367         return 1;
00368     return 0;
00369 }
00370 
00371 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
00372 {
00373     int r;
00374     double *D=CMath::clone_vector(B, active_i);
00375 
00376     if(yi < active_i)
00377         D[yi] += A_i*C_yi;
00378     qsort(D, active_i, sizeof(double), compare_double);
00379 
00380     double beta = D[0] - A_i*C_yi;
00381     for(r=1;r<active_i && beta<r*D[r];r++)
00382         beta += D[r];
00383 
00384     beta /= r;
00385     for(r=0;r<active_i;r++)
00386     {
00387         if(r == yi)
00388             alpha_new[r] = CMath::min(C_yi, (beta-B[r])/A_i);
00389         else
00390             alpha_new[r] = CMath::min((double)0, (beta - B[r])/A_i);
00391     }
00392     SG_FREE(D);
00393 }
00394 
00395 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
00396 {
00397     double bound = 0;
00398     if(m == yi)
00399         bound = C[GETI(i)];
00400     if(alpha_i == bound && G[m] < minG)
00401         return true;
00402     return false;
00403 }
00404 
00405 void Solver_MCSVM_CS::Solve(double *w)
00406 {
00407     int i, m, s;
00408     int iter = 0;
00409     double *alpha =  SG_MALLOC(double, l*nr_class);
00410     double *alpha_new = SG_MALLOC(double, nr_class);
00411     int *index = SG_MALLOC(int, l);
00412     double *QD = SG_MALLOC(double, l);
00413     int *d_ind = SG_MALLOC(int, nr_class);
00414     double *d_val = SG_MALLOC(double, nr_class);
00415     int *alpha_index = SG_MALLOC(int, nr_class*l);
00416     int *y_index = SG_MALLOC(int, l);
00417     int active_size = l;
00418     int *active_size_i = SG_MALLOC(int, l);
00419     double eps_shrink = CMath::max(10.0*eps, 1.0); // stopping tolerance for shrinking
00420     bool start_from_all = true;
00421     // initial
00422     for(i=0;i<l*nr_class;i++)
00423         alpha[i] = 0;
00424     for(i=0;i<w_size*nr_class;i++)
00425         w[i] = 0; 
00426     for(i=0;i<l;i++)
00427     {
00428         for(m=0;m<nr_class;m++)
00429             alpha_index[i*nr_class+m] = m;
00430 
00431         QD[i] = prob->x->dot(i, prob->x,i);
00432 
00433         active_size_i[i] = nr_class;
00434         y_index[i] = prob->y[i];
00435         index[i] = i;
00436     }
00437 
00438     while(iter < max_iter) 
00439     {
00440         double stopping = -CMath::INFTY;
00441         for(i=0;i<active_size;i++)
00442         {
00443             int j = i+rand()%(active_size-i);
00444             CMath::swap(index[i], index[j]);
00445         }
00446         for(s=0;s<active_size;s++)
00447         {
00448             i = index[s];
00449             double Ai = QD[i];
00450             double *alpha_i = &alpha[i*nr_class];
00451             int *alpha_index_i = &alpha_index[i*nr_class];
00452 
00453             if(Ai > 0)
00454             {
00455                 for(m=0;m<active_size_i[i];m++)
00456                     G[m] = 1;
00457                 if(y_index[i] < active_size_i[i])
00458                     G[y_index[i]] = 0;
00459 
00460                 SG_SNOTIMPLEMENTED;
00461                 /* FIXME
00462                 feature_node *xi = prob->x[i];
00463                 while(xi->index!= -1)
00464                 {
00465                     double *w_i = &w[(xi->index-1)*nr_class];
00466                     for(m=0;m<active_size_i[i];m++)
00467                         G[m] += w_i[alpha_index_i[m]]*(xi->value);
00468                     xi++;
00469                 }
00470                 */
00471 
00472                 double minG = CMath::INFTY;
00473                 double maxG = -CMath::INFTY;
00474                 for(m=0;m<active_size_i[i];m++)
00475                 {
00476                     if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
00477                         minG = G[m];
00478                     if(G[m] > maxG)
00479                         maxG = G[m];
00480                 }
00481                 if(y_index[i] < active_size_i[i])
00482                     if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
00483                         minG = G[y_index[i]];
00484 
00485                 for(m=0;m<active_size_i[i];m++)
00486                 {
00487                     if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
00488                     {
00489                         active_size_i[i]--;
00490                         while(active_size_i[i]>m)
00491                         {
00492                             if(!be_shrunk(i, active_size_i[i], y_index[i], 
00493                                             alpha_i[alpha_index_i[active_size_i[i]]], minG))
00494                             {
00495                                 CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
00496                                 CMath::swap(G[m], G[active_size_i[i]]);
00497                                 if(y_index[i] == active_size_i[i])
00498                                     y_index[i] = m;
00499                                 else if(y_index[i] == m) 
00500                                     y_index[i] = active_size_i[i];
00501                                 break;
00502                             }
00503                             active_size_i[i]--;
00504                         }
00505                     }
00506                 }
00507 
00508                 if(active_size_i[i] <= 1)
00509                 {
00510                     active_size--;
00511                     CMath::swap(index[s], index[active_size]);
00512                     s--;    
00513                     continue;
00514                 }
00515 
00516                 if(maxG-minG <= 1e-12)
00517                     continue;
00518                 else
00519                     stopping = CMath::CMath::max(maxG - minG, stopping);
00520 
00521                 for(m=0;m<active_size_i[i];m++)
00522                     B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
00523 
00524                 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
00525                 int nz_d = 0;
00526                 for(m=0;m<active_size_i[i];m++)
00527                 {
00528                     double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
00529                     alpha_i[alpha_index_i[m]] = alpha_new[m];
00530                     if(fabs(d) >= 1e-12)
00531                     {
00532                         d_ind[nz_d] = alpha_index_i[m];
00533                         d_val[nz_d] = d;
00534                         nz_d++;
00535                     }
00536                 }
00537 
00538                 /* FIXME
00539                 xi = prob->x[i];
00540                 while(xi->index != -1)
00541                 {
00542                     double *w_i = &w[(xi->index-1)*nr_class];
00543                     for(m=0;m<nz_d;m++)
00544                         w_i[d_ind[m]] += d_val[m]*xi->value;
00545                     xi++;
00546                 }*/
00547             }
00548         }
00549 
00550         iter++;
00551         if(iter % 10 == 0)
00552         {
00553             SG_SINFO(".");
00554         }
00555 
00556         if(stopping < eps_shrink)
00557         {
00558             if(stopping < eps && start_from_all == true)
00559                 break;
00560             else
00561             {
00562                 active_size = l;
00563                 for(i=0;i<l;i++)
00564                     active_size_i[i] = nr_class;
00565                 SG_SINFO("*");
00566                 eps_shrink = CMath::max(eps_shrink/2, eps);
00567                 start_from_all = true;
00568             }
00569         }
00570         else
00571             start_from_all = false;
00572     }
00573 
00574     SG_SINFO("\noptimization finished, #iter = %d\n",iter);
00575     if (iter >= max_iter)
00576         SG_SINFO("Warning: reaching max number of iterations\n");
00577 
00578     // calculate objective value
00579     double v = 0;
00580     int nSV = 0;
00581     for(i=0;i<w_size*nr_class;i++)
00582         v += w[i]*w[i];
00583     v = 0.5*v;
00584     for(i=0;i<l*nr_class;i++)
00585     {
00586         v += alpha[i];
00587         if(fabs(alpha[i]) > 0)
00588             nSV++;
00589     }
00590     for(i=0;i<l;i++)
00591         v -= alpha[i*nr_class+prob->y[i]];
00592     SG_SINFO("Objective value = %lf\n",v);
00593     SG_SINFO("nSV = %d\n",nSV);
00594 
00595     delete [] alpha;
00596     delete [] alpha_new;
00597     delete [] index;
00598     delete [] QD;
00599     delete [] d_ind;
00600     delete [] d_val;
00601     delete [] alpha_index;
00602     delete [] y_index;
00603     delete [] active_size_i;
00604 }
00605 
00606 //
00607 // Interface functions
00608 //
00609 
00610 void destroy_model(struct model *model_)
00611 {
00612     SG_FREE(model_->w);
00613     SG_FREE(model_->label);
00614     SG_FREE(model_);
00615 }
00616 
00617 void destroy_param(parameter* param)
00618 {
00619     SG_FREE(param->weight_label);
00620     SG_FREE(param->weight);
00621 }
00622 #endif //HAVE_LAPACK
00623 #endif // DOXYGEN_SHOULD_SKIP_THIS