00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include <shogun/lib/config.h>
00034 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00035 #include <math.h>
00036 #include <stdio.h>
00037 #include <stdlib.h>
00038 #include <string.h>
00039 #include <stdarg.h>
00040
00041 #include <shogun/mathematics/Math.h>
00042 #include <shogun/optimization/liblinear/shogun_liblinear.h>
00043 #include <shogun/optimization/liblinear/tron.h>
00044 #include <shogun/lib/Time.h>
00045 #include <shogun/lib/Signal.h>
00046
00047 using namespace shogun;
00048
00049 l2r_lr_fun::l2r_lr_fun(const problem *p, float64_t* Cs)
00050 {
00051 int l=p->l;
00052
00053 this->m_prob = p;
00054
00055 z = SG_MALLOC(double, l);
00056 D = SG_MALLOC(double, l);
00057 C = Cs;
00058 }
00059
00060 l2r_lr_fun::~l2r_lr_fun()
00061 {
00062 SG_FREE(z);
00063 SG_FREE(D);
00064 }
00065
00066
00067 double l2r_lr_fun::fun(double *w)
00068 {
00069 int i;
00070 double f=0;
00071 float64_t *y=m_prob->y;
00072 int l=m_prob->l;
00073 int32_t n=m_prob->n;
00074
00075 Xv(w, z);
00076 for(i=0;i<l;i++)
00077 {
00078 double yz = y[i]*z[i];
00079 if (yz >= 0)
00080 f += C[i]*log(1 + exp(-yz));
00081 else
00082 f += C[i]*(-yz+log(1 + exp(yz)));
00083 }
00084 f += 0.5 *SGVector<float64_t>::dot(w,w,n);
00085
00086 return(f);
00087 }
00088
00089 void l2r_lr_fun::grad(double *w, double *g)
00090 {
00091 int i;
00092 float64_t *y=m_prob->y;
00093 int l=m_prob->l;
00094 int w_size=get_nr_variable();
00095
00096 for(i=0;i<l;i++)
00097 {
00098 z[i] = 1/(1 + exp(-y[i]*z[i]));
00099 D[i] = z[i]*(1-z[i]);
00100 z[i] = C[i]*(z[i]-1)*y[i];
00101 }
00102 XTv(z, g);
00103
00104 for(i=0;i<w_size;i++)
00105 g[i] = w[i] + g[i];
00106 }
00107
00108 int l2r_lr_fun::get_nr_variable()
00109 {
00110 return m_prob->n;
00111 }
00112
00113 void l2r_lr_fun::Hv(double *s, double *Hs)
00114 {
00115 int i;
00116 int l=m_prob->l;
00117 int w_size=get_nr_variable();
00118 double *wa = SG_MALLOC(double, l);
00119
00120 Xv(s, wa);
00121 for(i=0;i<l;i++)
00122 wa[i] = C[i]*D[i]*wa[i];
00123
00124 XTv(wa, Hs);
00125 for(i=0;i<w_size;i++)
00126 Hs[i] = s[i] + Hs[i];
00127 SG_FREE(wa);
00128 }
00129
00130 void l2r_lr_fun::Xv(double *v, double *res_Xv)
00131 {
00132 int32_t l=m_prob->l;
00133 int32_t n=m_prob->n;
00134 float64_t bias=0;
00135
00136 if (m_prob->use_bias)
00137 {
00138 n--;
00139 bias=v[n];
00140 }
00141
00142 m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
00143 }
00144
00145 void l2r_lr_fun::XTv(double *v, double *res_XTv)
00146 {
00147 int l=m_prob->l;
00148 int32_t n=m_prob->n;
00149
00150 memset(res_XTv, 0, sizeof(double)*m_prob->n);
00151
00152 if (m_prob->use_bias)
00153 n--;
00154
00155 for (int32_t i=0;i<l;i++)
00156 {
00157 m_prob->x->add_to_dense_vec(v[i], i, res_XTv, n);
00158
00159 if (m_prob->use_bias)
00160 res_XTv[n]+=v[i];
00161 }
00162 }
00163
00164 l2r_l2_svc_fun::l2r_l2_svc_fun(const problem *p, double* Cs)
00165 {
00166 int l=p->l;
00167
00168 this->m_prob = p;
00169
00170 z = SG_MALLOC(double, l);
00171 D = SG_MALLOC(double, l);
00172 I = SG_MALLOC(int, l);
00173 C=Cs;
00174
00175 }
00176
00177 l2r_l2_svc_fun::~l2r_l2_svc_fun()
00178 {
00179 SG_FREE(z);
00180 SG_FREE(D);
00181 SG_FREE(I);
00182 }
00183
00184 double l2r_l2_svc_fun::fun(double *w)
00185 {
00186 int i;
00187 double f=0;
00188 float64_t *y=m_prob->y;
00189 int l=m_prob->l;
00190 int w_size=get_nr_variable();
00191
00192 Xv(w, z);
00193 for(i=0;i<l;i++)
00194 {
00195 z[i] = y[i]*z[i];
00196 double d = 1-z[i];
00197 if (d > 0)
00198 f += C[i]*d*d;
00199 }
00200 f += 0.5*SGVector<float64_t>::dot(w, w, w_size);
00201
00202 return(f);
00203 }
00204
00205 void l2r_l2_svc_fun::grad(double *w, double *g)
00206 {
00207 int i;
00208 float64_t *y=m_prob->y;
00209 int l=m_prob->l;
00210 int w_size=get_nr_variable();
00211
00212 sizeI = 0;
00213 for (i=0;i<l;i++)
00214 if (z[i] < 1)
00215 {
00216 z[sizeI] = C[i]*y[i]*(z[i]-1);
00217 I[sizeI] = i;
00218 sizeI++;
00219 }
00220 subXTv(z, g);
00221
00222 for(i=0;i<w_size;i++)
00223 g[i] = w[i] + 2*g[i];
00224 }
00225
00226 int l2r_l2_svc_fun::get_nr_variable()
00227 {
00228 return m_prob->n;
00229 }
00230
00231 void l2r_l2_svc_fun::Hv(double *s, double *Hs)
00232 {
00233 int i;
00234 int l=m_prob->l;
00235 int w_size=get_nr_variable();
00236 double *wa = SG_MALLOC(double, l);
00237
00238 subXv(s, wa);
00239 for(i=0;i<sizeI;i++)
00240 wa[i] = C[I[i]]*wa[i];
00241
00242 subXTv(wa, Hs);
00243 for(i=0;i<w_size;i++)
00244 Hs[i] = s[i] + 2*Hs[i];
00245 SG_FREE(wa);
00246 }
00247
00248 void l2r_l2_svc_fun::Xv(double *v, double *res_Xv)
00249 {
00250 int32_t l=m_prob->l;
00251 int32_t n=m_prob->n;
00252 float64_t bias=0;
00253
00254 if (m_prob->use_bias)
00255 {
00256 n--;
00257 bias=v[n];
00258 }
00259
00260 m_prob->x->dense_dot_range(res_Xv, 0, l, NULL, v, n, bias);
00261 }
00262
00263 void l2r_l2_svc_fun::subXv(double *v, double *res_Xv)
00264 {
00265 int32_t n=m_prob->n;
00266 float64_t bias=0;
00267
00268 if (m_prob->use_bias)
00269 {
00270 n--;
00271 bias=v[n];
00272 }
00273
00274 m_prob->x->dense_dot_range_subset(I, sizeI, res_Xv, NULL, v, n, bias);
00275
00276
00277
00278
00279
00280
00281
00282
00283 }
00284
00285 void l2r_l2_svc_fun::subXTv(double *v, double *XTv)
00286 {
00287 int32_t n=m_prob->n;
00288
00289 if (m_prob->use_bias)
00290 n--;
00291
00292 memset(XTv, 0, sizeof(float64_t)*m_prob->n);
00293 for (int32_t i=0;i<sizeI;i++)
00294 {
00295 m_prob->x->add_to_dense_vec(v[i], I[i], XTv, n);
00296
00297 if (m_prob->use_bias)
00298 XTv[n]+=v[i];
00299 }
00300 }
00301
00302 l2r_l2_svr_fun::l2r_l2_svr_fun(const problem *prob, double *Cs, double p):
00303 l2r_l2_svc_fun(prob, Cs)
00304 {
00305 m_p = p;
00306 }
00307
00308 double l2r_l2_svr_fun::fun(double *w)
00309 {
00310 int i;
00311 double f=0;
00312 double *y=m_prob->y;
00313 int l=m_prob->l;
00314 int w_size=get_nr_variable();
00315 double d;
00316
00317 Xv(w, z);
00318
00319 for(i=0;i<w_size;i++)
00320 f += w[i]*w[i];
00321 f /= 2;
00322 for(i=0;i<l;i++)
00323 {
00324 d = z[i] - y[i];
00325 if(d < -m_p)
00326 f += C[i]*(d+m_p)*(d+m_p);
00327 else if(d > m_p)
00328 f += C[i]*(d-m_p)*(d-m_p);
00329 }
00330
00331 return(f);
00332 }
00333
00334 void l2r_l2_svr_fun::grad(double *w, double *g)
00335 {
00336 int i;
00337 double *y=m_prob->y;
00338 int l=m_prob->l;
00339 int w_size=get_nr_variable();
00340 double d;
00341
00342 sizeI = 0;
00343 for(i=0;i<l;i++)
00344 {
00345 d = z[i] - y[i];
00346
00347
00348 if(d < -m_p)
00349 {
00350 z[sizeI] = C[i]*(d+m_p);
00351 I[sizeI] = i;
00352 sizeI++;
00353 }
00354 else if(d > m_p)
00355 {
00356 z[sizeI] = C[i]*(d-m_p);
00357 I[sizeI] = i;
00358 sizeI++;
00359 }
00360
00361 }
00362 subXTv(z, g);
00363
00364 for(i=0;i<w_size;i++)
00365 g[i] = w[i] + 2*g[i];
00366 }
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00387 #define GETI(i) (prob->y[i])
00388
00389
00390 Solver_MCSVM_CS::Solver_MCSVM_CS(const problem *p, int n_class,
00391 double *weighted_C, double *w0_reg,
00392 double epsilon, int max_it, double max_time,
00393 mcsvm_state* given_state)
00394 {
00395 this->w_size = p->n;
00396 this->l = p->l;
00397 this->nr_class = n_class;
00398 this->eps = epsilon;
00399 this->max_iter = max_it;
00400 this->prob = p;
00401 this->C = weighted_C;
00402 this->w0 = w0_reg;
00403 this->max_train_time = max_time;
00404 this->state = given_state;
00405 }
00406
00407 Solver_MCSVM_CS::~Solver_MCSVM_CS()
00408 {
00409 }
00410
00411 int compare_double(const void *a, const void *b)
00412 {
00413 if(*(double *)a > *(double *)b)
00414 return -1;
00415 if(*(double *)a < *(double *)b)
00416 return 1;
00417 return 0;
00418 }
00419
00420 void Solver_MCSVM_CS::solve_sub_problem(double A_i, int yi, double C_yi, int active_i, double *alpha_new)
00421 {
00422 int r;
00423 double *D=SGVector<float64_t>::clone_vector(state->B, active_i);
00424
00425 if(yi < active_i)
00426 D[yi] += A_i*C_yi;
00427 qsort(D, active_i, sizeof(double), compare_double);
00428
00429 double beta = D[0] - A_i*C_yi;
00430 for(r=1;r<active_i && beta<r*D[r];r++)
00431 beta += D[r];
00432
00433 beta /= r;
00434 for(r=0;r<active_i;r++)
00435 {
00436 if(r == yi)
00437 alpha_new[r] = CMath::min(C_yi, (beta-state->B[r])/A_i);
00438 else
00439 alpha_new[r] = CMath::min((double)0, (beta - state->B[r])/A_i);
00440 }
00441 SG_FREE(D);
00442 }
00443
00444 bool Solver_MCSVM_CS::be_shrunk(int i, int m, int yi, double alpha_i, double minG)
00445 {
00446 double bound = 0;
00447 if(m == yi)
00448 bound = C[int32_t(GETI(i))];
00449 if(alpha_i == bound && state->G[m] < minG)
00450 return true;
00451 return false;
00452 }
00453
00454 void Solver_MCSVM_CS::solve()
00455 {
00456 int i, m, s, k;
00457 int iter = 0;
00458 double *w,*B,*G,*alpha,*alpha_new,*QD,*d_val;
00459 int *index,*d_ind,*alpha_index,*y_index,*active_size_i;
00460
00461 if (!state->allocated)
00462 {
00463 state->w = SG_CALLOC(double, nr_class*w_size);
00464 state->B = SG_CALLOC(double, nr_class);
00465 state->G = SG_CALLOC(double, nr_class);
00466 state->alpha = SG_CALLOC(double, l*nr_class);
00467 state->alpha_new = SG_CALLOC(double, nr_class);
00468 state->index = SG_CALLOC(int, l);
00469 state->QD = SG_CALLOC(double, l);
00470 state->d_ind = SG_CALLOC(int, nr_class);
00471 state->d_val = SG_CALLOC(double, nr_class);
00472 state->alpha_index = SG_CALLOC(int, nr_class*l);
00473 state->y_index = SG_CALLOC(int, l);
00474 state->active_size_i = SG_CALLOC(int, l);
00475 state->allocated = true;
00476 }
00477 w = state->w;
00478 B = state->B;
00479 G = state->G;
00480 alpha = state->alpha;
00481 alpha_new = state->alpha_new;
00482 index = state->index;
00483 QD = state->QD;
00484 d_ind = state->d_ind;
00485 d_val = state->d_val;
00486 alpha_index = state->alpha_index;
00487 y_index = state->y_index;
00488 active_size_i = state->active_size_i;
00489
00490 double* tx = SG_MALLOC(double, w_size);
00491 int dim = prob->x->get_dim_feature_space();
00492
00493 int active_size = l;
00494 double eps_shrink = CMath::max(10.0*eps, 1.0);
00495 bool start_from_all = true;
00496 CTime start_time;
00497
00498 if (!state->inited)
00499 {
00500 for(i=0;i<l;i++)
00501 {
00502 for(m=0;m<nr_class;m++)
00503 alpha_index[i*nr_class+m] = m;
00504
00505 QD[i] = prob->x->dot(i, prob->x,i);
00506 if (prob->use_bias)
00507 QD[i] += 1.0;
00508
00509 active_size_i[i] = nr_class;
00510 y_index[i] = prob->y[i];
00511 index[i] = i;
00512 }
00513 state->inited = true;
00514 }
00515
00516 while(iter < max_iter && !CSignal::cancel_computations())
00517 {
00518 double stopping = -CMath::INFTY;
00519 for(i=0;i<active_size;i++)
00520 {
00521 int j = i+rand()%(active_size-i);
00522 CMath::swap(index[i], index[j]);
00523 }
00524 for(s=0;s<active_size;s++)
00525 {
00526 i = index[s];
00527 double Ai = QD[i];
00528 double *alpha_i = &alpha[i*nr_class];
00529 int *alpha_index_i = &alpha_index[i*nr_class];
00530
00531 if(Ai > 0)
00532 {
00533 for(m=0;m<active_size_i[i];m++)
00534 G[m] = 1;
00535 if(y_index[i] < active_size_i[i])
00536 G[y_index[i]] = 0;
00537
00538 memset(tx,0,dim*sizeof(double));
00539 prob->x->add_to_dense_vec(1.0,i,tx,dim);
00540 for (k=0; k<dim; k++)
00541 {
00542 if (tx[k]==0.0)
00543 continue;
00544
00545 double* w_i = &w[k*nr_class];
00546 for (m=0; m<active_size_i[i]; m++)
00547 G[m] += w_i[alpha_index_i[m]]*tx[k];
00548 }
00549
00550
00551
00552 if (prob->use_bias)
00553 {
00554 double *w_i = &w[(w_size-1)*nr_class];
00555 for(m=0; m<active_size_i[i]; m++)
00556 G[m] += w_i[alpha_index_i[m]];
00557 }
00558 if (w0)
00559 {
00560 for (k=0; k<dim; k++)
00561 {
00562 double *w0_i = &w0[k*nr_class];
00563 for(m=0; m<active_size_i[i]; m++)
00564 G[m] += w0_i[alpha_index_i[m]];
00565 }
00566 }
00567
00568
00569 double minG = CMath::INFTY;
00570 double maxG = -CMath::INFTY;
00571 for(m=0;m<active_size_i[i];m++)
00572 {
00573 if(alpha_i[alpha_index_i[m]] < 0 && G[m] < minG)
00574 minG = G[m];
00575 if(G[m] > maxG)
00576 maxG = G[m];
00577 }
00578 if(y_index[i] < active_size_i[i])
00579 if(alpha_i[int32_t(prob->y[i])] < C[int32_t(GETI(i))] && G[y_index[i]] < minG)
00580 minG = G[y_index[i]];
00581
00582 for(m=0;m<active_size_i[i];m++)
00583 {
00584 if(be_shrunk(i, m, y_index[i], alpha_i[alpha_index_i[m]], minG))
00585 {
00586 active_size_i[i]--;
00587 while(active_size_i[i]>m)
00588 {
00589 if(!be_shrunk(i, active_size_i[i], y_index[i],
00590 alpha_i[alpha_index_i[active_size_i[i]]], minG))
00591 {
00592 CMath::swap(alpha_index_i[m], alpha_index_i[active_size_i[i]]);
00593 CMath::swap(G[m], G[active_size_i[i]]);
00594 if(y_index[i] == active_size_i[i])
00595 y_index[i] = m;
00596 else if(y_index[i] == m)
00597 y_index[i] = active_size_i[i];
00598 break;
00599 }
00600 active_size_i[i]--;
00601 }
00602 }
00603 }
00604
00605 if(active_size_i[i] <= 1)
00606 {
00607 active_size--;
00608 CMath::swap(index[s], index[active_size]);
00609 s--;
00610 continue;
00611 }
00612
00613 if(maxG-minG <= 1e-12)
00614 continue;
00615 else
00616 stopping = CMath::CMath::max(maxG - minG, stopping);
00617
00618 for(m=0;m<active_size_i[i];m++)
00619 B[m] = G[m] - Ai*alpha_i[alpha_index_i[m]] ;
00620
00621 solve_sub_problem(Ai, y_index[i], C[int32_t(GETI(i))], active_size_i[i], alpha_new);
00622 int nz_d = 0;
00623 for(m=0;m<active_size_i[i];m++)
00624 {
00625 double d = alpha_new[m] - alpha_i[alpha_index_i[m]];
00626 alpha_i[alpha_index_i[m]] = alpha_new[m];
00627 if(fabs(d) >= 1e-12)
00628 {
00629 d_ind[nz_d] = alpha_index_i[m];
00630 d_val[nz_d] = d;
00631 nz_d++;
00632 }
00633 }
00634
00635 memset(tx,0,dim*sizeof(double));
00636 prob->x->add_to_dense_vec(1.0,i,tx,dim);
00637 for (k=0; k<dim; k++)
00638 {
00639 if (tx[k]==0.0)
00640 continue;
00641
00642 double* w_i = &w[k*nr_class];
00643 for (m=0; m<nz_d; m++)
00644 w_i[d_ind[m]] += d_val[m]*tx[k];
00645 }
00646
00647
00648 if (prob->use_bias)
00649 {
00650 double *w_i = &w[(w_size-1)*nr_class];
00651 for(m=0;m<nz_d;m++)
00652 w_i[d_ind[m]] += d_val[m];
00653 }
00654
00655 }
00656 }
00657
00658 iter++;
00659
00660
00661
00662
00663
00664
00665
00666 if(stopping < eps_shrink)
00667 {
00668 if(stopping < eps && start_from_all == true)
00669 break;
00670 else
00671 {
00672 active_size = l;
00673 for(i=0;i<l;i++)
00674 active_size_i[i] = nr_class;
00675
00676 eps_shrink = CMath::max(eps_shrink/2, eps);
00677 start_from_all = true;
00678 }
00679 }
00680 else
00681 start_from_all = false;
00682
00683 if (max_train_time!=0.0 && max_train_time < start_time.cur_time_diff())
00684 break;
00685 }
00686
00687 SG_SINFO("\noptimization finished, #iter = %d\n",iter);
00688 if (iter >= max_iter)
00689 SG_SINFO("Warning: reaching max number of iterations\n");
00690
00691 SG_FREE(tx);
00692 }
00693
00694
00695
00696
00697
00698 void destroy_model(struct model *model_)
00699 {
00700 SG_FREE(model_->w);
00701 SG_FREE(model_->label);
00702 SG_FREE(model_);
00703 }
00704
00705 void destroy_param(parameter* param)
00706 {
00707 SG_FREE(param->weight_label);
00708 SG_FREE(param->weight);
00709 }
00710 #endif // DOXYGEN_SHOULD_SKIP_THIS