00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <shogun/lib/config.h>
00034 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00035 #ifdef HAVE_LAPACK
00036 #include <math.h>
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #include <string.h>
00040 #include <stdarg.h>
00041
00042 #include <shogun/mathematics/Math.h>
00043 #include <shogun/classifier/svm/SVM_linear.h>
00044 #include <shogun/classifier/svm/Tron.h>
00045
00046 using namespace shogun;
00047
00048 l2r_lr_fun::l2r_lr_fun(const problem *p, float64_t Cp, float64_t Cn)
00049 {
00050 int i;
00051 int l=p->l;
00052 int *y=p->y;
00053
00054 this->prob = p;
00055
00056 z = SG_MALLOC(double, l);
00057 D = SG_MALLOC(double, l);
00058 C = SG_MALLOC(double, l);
00059
00060 for (i=0; i<l; i++)
00061 {
00062 if (y[i] == 1)
00063 C[i] = Cp;
00064 else
00065 C[i] = Cn;
00066 }
00067 }
00068
00069 l2r_lr_fun::~l2r_lr_fun()
00070 {
00071 SG_FREE(z);
00072 SG_FREE(D);
00073 SG_FREE(C);
00074 }
00075
00076
00077 double l2r_lr_fun::fun(double *w)
00078 {
00079 int i;
00080 double f=0;
00081 int *y=prob->y;
00082 int l=prob->l;
00083 int32_t n=prob->n;
00084
00085 Xv(w, z);
00086 for(i=0;i<l;i++)
00087 {
00088 double yz = y[i]*z[i];
00089 if (yz >= 0)
00090 f += C[i]*log(1 + exp(-yz));
00091 else
00092 f += C[i]*(-yz+log(1 + exp(yz)));
00093 }
00094 f += 0.5 *CMath::dot(w,w,n);
00095
00096 return(f);
00097 }
00098
00099 void l2r_lr_fun::grad(double *w, double *g)
00100 {
00101 int i;
00102 int *y=prob->y;
00103 int l=prob->l;
00104 int w_size=get_nr_variable();
00105
00106 for(i=0;i<l;i++)
00107 {
00108 z[i] = 1/(1 + exp(-y[i]*z[i]));
00109 D[i] = z[i]*(1-z[i]);
00110 z[i] = C[i]*(z[i]-1)*y[i];
00111 }
00112 XTv(z, g);
00113
00114 for(i=0;i<w_size;i++)
00115 g[i] = w[i] + g[i];
00116 }
00117
00118 int l2r_lr_fun::get_nr_variable()
00119 {
00120 return prob->n;
00121 }
00122
00123 void l2r_lr_fun::Hv(double *s, double *Hs)
00124 {
00125 int i;
00126 int l=prob->l;
00127 int w_size=get_nr_variable();
00128 double *wa = SG_MALLOC(double, l);
00129
00130 Xv(s, wa);
00131 for(i=0;i<l;i++)
00132 wa[i] = C[i]*D[i]*wa[i];
00133
00134 XTv(wa, Hs);
00135 for(i=0;i<w_size;i++)
00136 Hs[i] = s[i] + Hs[i];
00137 SG_FREE(wa);
00138 }
00139
00140 void l2r_lr_fun::Xv(double *v, double *res_Xv)
00141 {
00142 int32_t l=prob->l;
00143 int32_t n=prob->n;
00144 float64_t bias=0;
00145
00146 if (prob->use_bias)
00147 {
00148 n--;
00149 bias=v[n];
00150 }
00151
00152 prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
00153 }
00154
00155 void l2r_lr_fun::XTv(double *v, double *res_XTv)
00156 {
00157 int l=prob->l;
00158 int32_t n=prob->n;
00159
00160 memset(res_XTv, 0, sizeof(double)*prob->n);
00161
00162 if (prob->use_bias)
00163 n--;
00164
00165 for (int32_t i=0;i<l;i++)
00166 {
00167 prob->x->add_to_dense_vec(v[i], i, res_XTv, n);
00168
00169 if (prob->use_bias)
00170 res_XTv[n]+=v[i];
00171 }
00172 }
00173
00174 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *p, double Cp, double Cn)
00175 {
00176 int i;
00177 int l=p->l;
00178 int *y=p->y;
00179
00180 this->prob = p;
00181
00182 z = SG_MALLOC(double, l);
00183 D = SG_MALLOC(double, l);
00184 C = SG_MALLOC(double, l);
00185 I = SG_MALLOC(int, l);
00186
00187 for (i=0; i<l; i++)
00188 {
00189 if (y[i] == 1)
00190 C[i] = Cp;
00191 else
00192 C[i] = Cn;
00193 }
00194 }
00195
00196 l2r_l2_svc_fun::~l2r_l2_svc_fun()
00197 {
00198 SG_FREE(z);
00199 SG_FREE(D);
00200 SG_FREE(C);
00201 SG_FREE(I);
00202 }
00203
00204 double l2r_l2_svc_fun::fun(double *w)
00205 {
00206 int i;
00207 double f=0;
00208 int *y=prob->y;
00209 int l=prob->l;
00210 int w_size=get_nr_variable();
00211
00212 Xv(w, z);
00213 for(i=0;i<l;i++)
00214 {
00215 z[i] = y[i]*z[i];
00216 double d = 1-z[i];
00217 if (d > 0)
00218 f += C[i]*d*d;
00219 }
00220 f += 0.5*CMath::dot(w, w, w_size);
00221
00222 return(f);
00223 }
00224
00225 void l2r_l2_svc_fun::grad(double *w, double *g)
00226 {
00227 int i;
00228 int *y=prob->y;
00229 int l=prob->l;
00230 int w_size=get_nr_variable();
00231
00232 sizeI = 0;
00233 for (i=0;i<l;i++)
00234 if (z[i] < 1)
00235 {
00236 z[sizeI] = C[i]*y[i]*(z[i]-1);
00237 I[sizeI] = i;
00238 sizeI++;
00239 }
00240 subXTv(z, g);
00241
00242 for(i=0;i<w_size;i++)
00243 g[i] = w[i] + 2*g[i];
00244 }
00245
00246 int l2r_l2_svc_fun::get_nr_variable()
00247 {
00248 return prob->n;
00249 }
00250
00251 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
00252 {
00253 int i;
00254 int l=prob->l;
00255 int w_size=get_nr_variable();
00256 double *wa = SG_MALLOC(double, l);
00257
00258 subXv(s, wa);
00259 for(i=0;i<sizeI;i++)
00260 wa[i] = C[I[i]]*wa[i];
00261
00262 subXTv(wa, Hs);
00263 for(i=0;i<w_size;i++)
00264 Hs[i] = s[i] + 2*Hs[i];
00265 SG_FREE(wa);
00266 }
00267
00268 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv)
00269 {
00270 int32_t l=prob->l;
00271 int32_t n=prob->n;
00272 float64_t bias=0;
00273
00274 if (prob->use_bias)
00275 {
00276 n--;
00277 bias=v[n];
00278 }
00279
00280 prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
00281 }
00282
00283 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv)
00284 {
00285 int32_t n=prob->n;
00286 float64_t bias=0;
00287
00288 if (prob->use_bias)
00289 {
00290 n--;
00291 bias=v[n];
00292 }
00293
00294 prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias);
00295
00296
00297
00298
00299
00300
00301
00302
00303 }
00304
00305 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
00306 {
00307 int32_t n=prob->n;
00308
00309 if (prob->use_bias)
00310 n--;
00311
00312 memset(XTv, 0, sizeof(float64_t)*prob->n);
00313 for (int32_t i=0;i<sizeI;i++)
00314 {
00315 prob->x->add_to_dense_vec(v[i], I[i], XTv, n);
00316
00317 if (prob->use_bias)
00318 XTv[n]+=v[i];
00319 }
00320 }
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340 #define GETI(i) (prob->y[i])
00341
00342
00343 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class, double *weighted_C, double epsilon, int max_it)
00344 {
00345 this->w_size = prob->n;
00346 this->l = prob->l;
00347 this->nr_class = n_class;
00348 this->eps = epsilon;
00349 this->max_iter = max_it;
00350 this->prob = p;
00351 this->C = weighted_C;
00352 this->B = SG_MALLOC(double, nr_class);
00353 this->G = SG_MALLOC(double, nr_class);
00354 }
00355
00356 Solver_MCSVM_CS::~Solver_MCSVM_CS()
00357 {
00358 SG_FREE(B);
00359 SG_FREE(G);
00360 }
00361
00362 int compare_double(const void *a, const void *b)
00363 {
00364 if(*(double *)a > *(double *)b)
00365 return -1;
00366 if(*(double *)a < *(double *)b)
00367 return 1;
00368 return 0;
00369 }
00370
00371 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
00372 {
00373 int r;
00374 double *D=CMath::clone_vector(B, active_i);
00375
00376 if(yi < active_i)
00377 D[yi] += A_i*C_yi;
00378 qsort(D, active_i, sizeof(double), compare_double);
00379
00380 double beta = D[0] - A_i*C_yi;
00381 for(r=1;r<active_i && beta<r*D[r];r++)
00382 beta += D[r];
00383
00384 beta /= r;
00385 for(r=0;r<active_i;r++)
00386 {
00387 if(r == yi)
00388 alpha_new[r] = CMath::min(C_yi, (beta-B[r])/A_i);
00389 else
00390 alpha_new[r] = CMath::min((double)0, (beta - B[r])/A_i);
00391 }
00392 SG_FREE(D);
00393 }
00394
00395 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
00396 {
00397 double bound = 0;
00398 if(m == yi)
00399 bound = C[GETI(i)];
00400 if(alpha_i == bound && G[m] < minG)
00401 return true;
00402 return false;
00403 }
00404
00405 void Solver_MCSVM_CS::Solve(double *w)
00406 {
00407 int i, m, s;
00408 int iter = 0;
00409 double *alpha = SG_MALLOC(double, l*nr_class);
00410 double *alpha_new = SG_MALLOC(double, nr_class);
00411 int *index = SG_MALLOC(int, l);
00412 double *QD = SG_MALLOC(double, l);
00413 int *d_ind = SG_MALLOC(int, nr_class);
00414 double *d_val = SG_MALLOC(double, nr_class);
00415 int *alpha_index = SG_MALLOC(int, nr_class*l);
00416 int *y_index = SG_MALLOC(int, l);
00417 int active_size = l;
00418 int *active_size_i = SG_MALLOC(int, l);
00419 double eps_shrink = CMath::max(10.0*eps, 1.0);
00420 bool start_from_all = true;
00421
00422 for(i=0;i<l*nr_class;i++)
00423 alpha[i] = 0;
00424 for(i=0;i<w_size*nr_class;i++)
00425 w[i] = 0;
00426 for(i=0;i<l;i++)
00427 {
00428 for(m=0;m<nr_class;m++)
00429 alpha_index[i*nr_class+m] = m;
00430
00431 QD[i] = prob->x->dot(i, prob->x,i);
00432
00433 active_size_i[i] = nr_class;
00434 y_index[i] = prob->y[i];
00435 index[i] = i;
00436 }
00437
00438 while(iter < max_iter)
00439 {
00440 double stopping = -CMath::INFTY;
00441 for(i=0;i<active_size;i++)
00442 {
00443 int j = i+rand()%(active_size-i);
00444 CMath::swap(index[i], index[j]);
00445 }
00446 for(s=0;s<active_size;s++)
00447 {
00448 i = index[s];
00449 double Ai = QD[i];
00450 double *alpha_i = &alpha[i*nr_class];
00451 int *alpha_index_i = &alpha_index[i*nr_class];
00452
00453 if(Ai > 0)
00454 {
00455 for(m=0;m<active_size_i[i];m++)
00456 G[m] = 1;
00457 if(y_index[i] < active_size_i[i])
00458 G[y_index[i]] = 0;
00459
00460 SG_SNOTIMPLEMENTED;
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472 double minG = CMath::INFTY;
00473 double maxG = -CMath::INFTY;
00474 for(m=0;m<active_size_i[i];m++)
00475 {
00476 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
00477 minG = G[m];
00478 if(G[m] > maxG)
00479 maxG = G[m];
00480 }
00481 if(y_index[i] < active_size_i[i])
00482 if(alpha_i[prob->y[i]] < C[GETI(i)] && G[y_index[i]] < minG)
00483 minG = G[y_index[i]];
00484
00485 for(m=0;m<active_size_i[i];m++)
00486 {
00487 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
00488 {
00489 active_size_i[i]--;
00490 while(active_size_i[i]>m)
00491 {
00492 if(!be_shrunk(i, active_size_i[i], y_index[i],
00493 alpha_i[alpha_index_i[active_size_i[i]]], minG))
00494 {
00495 CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
00496 CMath::swap(G[m], G[active_size_i[i]]);
00497 if(y_index[i] == active_size_i[i])
00498 y_index[i] = m;
00499 else if(y_index[i] == m)
00500 y_index[i] = active_size_i[i];
00501 break;
00502 }
00503 active_size_i[i]--;
00504 }
00505 }
00506 }
00507
00508 if(active_size_i[i] <= 1)
00509 {
00510 active_size--;
00511 CMath::swap(index[s], index[active_size]);
00512 s--;
00513 continue;
00514 }
00515
00516 if(maxG-minG <= 1e-12)
00517 continue;
00518 else
00519 stopping = CMath::CMath::max(maxG - minG, stopping);
00520
00521 for(m=0;m<active_size_i[i];m++)
00522 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
00523
00524 solve_sub_problem(Ai, y_index[i], C[GETI(i)], active_size_i[i], alpha_new);
00525 int nz_d = 0;
00526 for(m=0;m<active_size_i[i];m++)
00527 {
00528 double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
00529 alpha_i[alpha_index_i[m]] = alpha_new[m];
00530 if(fabs(d) >= 1e-12)
00531 {
00532 d_ind[nz_d] = alpha_index_i[m];
00533 d_val[nz_d] = d;
00534 nz_d++;
00535 }
00536 }
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547 }
00548 }
00549
00550 iter++;
00551 if(iter % 10 == 0)
00552 {
00553 SG_SINFO(".");
00554 }
00555
00556 if(stopping < eps_shrink)
00557 {
00558 if(stopping < eps && start_from_all == true)
00559 break;
00560 else
00561 {
00562 active_size = l;
00563 for(i=0;i<l;i++)
00564 active_size_i[i] = nr_class;
00565 SG_SINFO("*");
00566 eps_shrink = CMath::max(eps_shrink/2, eps);
00567 start_from_all = true;
00568 }
00569 }
00570 else
00571 start_from_all = false;
00572 }
00573
00574 SG_SINFO("\noptimization finished, #iter = %d\n",iter);
00575 if (iter >= max_iter)
00576 SG_SINFO("Warning: reaching max number of iterations\n");
00577
00578
00579 double v = 0;
00580 int nSV = 0;
00581 for(i=0;i<w_size*nr_class;i++)
00582 v += w[i]*w[i];
00583 v = 0.5*v;
00584 for(i=0;i<l*nr_class;i++)
00585 {
00586 v += alpha[i];
00587 if(fabs(alpha[i]) > 0)
00588 nSV++;
00589 }
00590 for(i=0;i<l;i++)
00591 v -= alpha[i*nr_class+prob->y[i]];
00592 SG_SINFO("Objective value = %lf\n",v);
00593 SG_SINFO("nSV = %d\n",nSV);
00594
00595 delete [] alpha;
00596 delete [] alpha_new;
00597 delete [] index;
00598 delete [] QD;
00599 delete [] d_ind;
00600 delete [] d_val;
00601 delete [] alpha_index;
00602 delete [] y_index;
00603 delete [] active_size_i;
00604 }
00605
00606
00607
00608
00609
00610 void destroy_model(struct model *model_)
00611 {
00612 SG_FREE(model_->w);
00613 SG_FREE(model_->label);
00614 SG_FREE(model_);
00615 }
00616
00617 void destroy_param(parameter* param)
00618 {
00619 SG_FREE(param->weight_label);
00620 SG_FREE(param->weight);
00621 }
00622 #endif //HAVE_LAPACK
00623 #endif // DOXYGEN_SHOULD_SKIP_THIS