00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/lib/config.h>
00012
00013 #include <shogun/io/SGIO.h>
00014 #include <shogun/lib/Signal.h>
00015 #include <shogun/lib/Time.h>
00016 #include <shogun/base/Parameter.h>
00017 #include <shogun/classifier/svm/LibLinear.h>
00018 #include <shogun/optimization/liblinear/tron.h>
00019 #include <shogun/features/DotFeatures.h>
00020 #include <shogun/labels/BinaryLabels.h>
00021
00022 using namespace shogun;
00023
00024 CLibLinear::CLibLinear()
00025 : CLinearMachine()
00026 {
00027 init();
00028 }
00029
00030 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l)
00031 : CLinearMachine()
00032 {
00033 init();
00034 liblinear_solver_type=l;
00035 }
00036
00037 CLibLinear::CLibLinear(
00038 float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00039 : CLinearMachine()
00040 {
00041 init();
00042 C1=C;
00043 C2=C;
00044 use_bias=true;
00045
00046 set_features(traindat);
00047 set_labels(trainlab);
00048 init_linear_term();
00049 }
00050
00051 void CLibLinear::init()
00052 {
00053 liblinear_solver_type=L2R_L1LOSS_SVC_DUAL;
00054 use_bias=false;
00055 C1=1;
00056 C2=1;
00057 set_max_iterations();
00058 epsilon=1e-5;
00059
00060 SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
00061 SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
00062 SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
00063 MS_NOT_AVAILABLE);
00064 SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
00065 SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
00066 MS_NOT_AVAILABLE);
00067 SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
00068 SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
00069 "Type of LibLinear solver.", MS_NOT_AVAILABLE);
00070 }
00071
00072 CLibLinear::~CLibLinear()
00073 {
00074 }
00075
00076 bool CLibLinear::train_machine(CFeatures* data)
00077 {
00078 CSignal::clear_cancel();
00079 ASSERT(m_labels);
00080 ASSERT(m_labels->get_label_type() == LT_BINARY);
00081
00082 if (data)
00083 {
00084 if (!data->has_property(FP_DOT))
00085 SG_ERROR("Specified features are not of type CDotFeatures\n");
00086
00087 set_features((CDotFeatures*) data);
00088 }
00089 ASSERT(features);
00090
00091
00092 int32_t num_train_labels=m_labels->get_num_labels();
00093 int32_t num_feat=features->get_dim_feature_space();
00094 int32_t num_vec=features->get_num_vectors();
00095
00096 if (liblinear_solver_type == L1R_L2LOSS_SVC ||
00097 (liblinear_solver_type == L1R_LR) )
00098 {
00099 if (num_feat!=num_train_labels)
00100 {
00101 SG_ERROR("L1 methods require the data to be transposed: "
00102 "number of features %d does not match number of "
00103 "training labels %d\n",
00104 num_feat, num_train_labels);
00105 }
00106 CMath::swap(num_feat, num_vec);
00107
00108 }
00109 else
00110 {
00111 if (num_vec!=num_train_labels)
00112 {
00113 SG_ERROR("number of vectors %d does not match "
00114 "number of training labels %d\n",
00115 num_vec, num_train_labels);
00116 }
00117 }
00118 if (use_bias)
00119 w=SGVector<float64_t>(SG_MALLOC(float64_t, num_feat+1), num_feat);
00120 else
00121 w=SGVector<float64_t>(num_feat);
00122
00123 problem prob;
00124 if (use_bias)
00125 {
00126 prob.n=w.vlen+1;
00127 memset(w.vector, 0, sizeof(float64_t)*(w.vlen+1));
00128 }
00129 else
00130 {
00131 prob.n=w.vlen;
00132 memset(w.vector, 0, sizeof(float64_t)*(w.vlen+0));
00133 }
00134 prob.l=num_vec;
00135 prob.x=features;
00136 prob.y=SG_MALLOC(double, prob.l);
00137 float64_t* Cs=SG_MALLOC(double, prob.l);
00138 prob.use_bias=use_bias;
00139 double Cp=C1;
00140 double Cn=C2;
00141
00142 for (int32_t i=0; i<prob.l; i++)
00143 {
00144 prob.y[i]=((CBinaryLabels*) m_labels)->get_int_label(i);
00145 if (prob.y[i] == +1)
00146 Cs[i]=C1;
00147 else if (prob.y[i] == -1)
00148 Cs[i]=C2;
00149 else
00150 SG_ERROR("labels should be +1/-1 only\n");
00151 }
00152
00153 int pos = 0;
00154 int neg = 0;
00155 for(int i=0;i<prob.l;i++)
00156 {
00157 if(prob.y[i]==+1)
00158 pos++;
00159 }
00160 neg = prob.l - pos;
00161
00162 SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00163
00164 function *fun_obj=NULL;
00165 switch (liblinear_solver_type)
00166 {
00167 case L2R_LR:
00168 {
00169 fun_obj=new l2r_lr_fun(&prob, Cs);
00170 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00171 SG_DEBUG("starting L2R_LR training via tron\n");
00172 tron_obj.tron(w.vector, m_max_train_time);
00173 SG_DEBUG("done with tron\n");
00174 delete fun_obj;
00175 break;
00176 }
00177 case L2R_L2LOSS_SVC:
00178 {
00179 fun_obj=new l2r_l2_svc_fun(&prob, Cs);
00180 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00181 tron_obj.tron(w.vector, m_max_train_time);
00182 delete fun_obj;
00183 break;
00184 }
00185 case L2R_L2LOSS_SVC_DUAL:
00186 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
00187 break;
00188 case L2R_L1LOSS_SVC_DUAL:
00189 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
00190 break;
00191 case L1R_L2LOSS_SVC:
00192 {
00193
00194 solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00195 break;
00196 }
00197 case L1R_LR:
00198 {
00199
00200 solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00201 break;
00202 }
00203 case L2R_LR_DUAL:
00204 {
00205 solve_l2r_lr_dual(&prob, epsilon, Cp, Cn);
00206 break;
00207 }
00208 default:
00209 SG_ERROR("Error: unknown solver_type\n");
00210 break;
00211 }
00212
00213 if (use_bias)
00214 set_bias(w[w.vlen]);
00215 else
00216 set_bias(0);
00217
00218 SG_FREE(prob.y);
00219 SG_FREE(Cs);
00220
00221 return true;
00222 }
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248 #undef GETI
00249 #define GETI(i) (y[i]+1)
00250
00251
00252 void CLibLinear::solve_l2r_l1l2_svc(
00253 const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
00254 {
00255 int l = prob->l;
00256 int w_size = prob->n;
00257 int i, s, iter = 0;
00258 double C, d, G;
00259 double *QD = SG_MALLOC(double, l);
00260 int *index = SG_MALLOC(int, l);
00261 double *alpha = SG_MALLOC(double, l);
00262 int32_t *y = SG_MALLOC(int32_t, l);
00263 int active_size = l;
00264
00265
00266 double PG;
00267 double PGmax_old = CMath::INFTY;
00268 double PGmin_old = -CMath::INFTY;
00269 double PGmax_new, PGmin_new;
00270
00271
00272 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00273 double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00274 if(st == L2R_L1LOSS_SVC_DUAL)
00275 {
00276 diag[0] = 0;
00277 diag[2] = 0;
00278 upper_bound[0] = Cn;
00279 upper_bound[2] = Cp;
00280 }
00281
00282 int n = prob->n;
00283
00284 if (prob->use_bias)
00285 n--;
00286
00287 for(i=0; i<w_size; i++)
00288 w[i] = 0;
00289
00290 for(i=0; i<l; i++)
00291 {
00292 alpha[i] = 0;
00293 if(prob->y[i] > 0)
00294 {
00295 y[i] = +1;
00296 }
00297 else
00298 {
00299 y[i] = -1;
00300 }
00301 QD[i] = diag[GETI(i)];
00302
00303 QD[i] += prob->x->dot(i, prob->x,i);
00304 index[i] = i;
00305 }
00306
00307
00308 CTime start_time;
00309 while (iter < max_iterations && !CSignal::cancel_computations())
00310 {
00311 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00312 break;
00313
00314 PGmax_new = -CMath::INFTY;
00315 PGmin_new = CMath::INFTY;
00316
00317 for (i=0; i<active_size; i++)
00318 {
00319 int j = i+rand()%(active_size-i);
00320 CMath::swap(index[i], index[j]);
00321 }
00322
00323 for (s=0;s<active_size;s++)
00324 {
00325 i = index[s];
00326 int32_t yi = y[i];
00327
00328 G = prob->x->dense_dot(i, w.vector, n);
00329 if (prob->use_bias)
00330 G+=w.vector[n];
00331
00332 if (m_linear_term.vector)
00333 G = G*yi + m_linear_term.vector[i];
00334 else
00335 G = G*yi-1;
00336
00337 C = upper_bound[GETI(i)];
00338 G += alpha[i]*diag[GETI(i)];
00339
00340 PG = 0;
00341 if (alpha[i] == 0)
00342 {
00343 if (G > PGmax_old)
00344 {
00345 active_size--;
00346 CMath::swap(index[s], index[active_size]);
00347 s--;
00348 continue;
00349 }
00350 else if (G < 0)
00351 PG = G;
00352 }
00353 else if (alpha[i] == C)
00354 {
00355 if (G < PGmin_old)
00356 {
00357 active_size--;
00358 CMath::swap(index[s], index[active_size]);
00359 s--;
00360 continue;
00361 }
00362 else if (G > 0)
00363 PG = G;
00364 }
00365 else
00366 PG = G;
00367
00368 PGmax_new = CMath::max(PGmax_new, PG);
00369 PGmin_new = CMath::min(PGmin_new, PG);
00370
00371 if(fabs(PG) > 1.0e-12)
00372 {
00373 double alpha_old = alpha[i];
00374 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
00375 d = (alpha[i] - alpha_old)*yi;
00376
00377 prob->x->add_to_dense_vec(d, i, w.vector, n);
00378
00379 if (prob->use_bias)
00380 w.vector[n]+=d;
00381 }
00382 }
00383
00384 iter++;
00385 float64_t gap=PGmax_new - PGmin_new;
00386 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00387
00388 if(gap <= eps)
00389 {
00390 if(active_size == l)
00391 break;
00392 else
00393 {
00394 active_size = l;
00395 PGmax_old = CMath::INFTY;
00396 PGmin_old = -CMath::INFTY;
00397 continue;
00398 }
00399 }
00400 PGmax_old = PGmax_new;
00401 PGmin_old = PGmin_new;
00402 if (PGmax_old <= 0)
00403 PGmax_old = CMath::INFTY;
00404 if (PGmin_old >= 0)
00405 PGmin_old = -CMath::INFTY;
00406 }
00407
00408 SG_DONE();
00409 SG_INFO("optimization finished, #iter = %d\n",iter);
00410 if (iter >= max_iterations)
00411 {
00412 SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00413 "(also see liblinear FAQ)\n\n");
00414 }
00415
00416
00417
00418 double v = 0;
00419 int nSV = 0;
00420 for(i=0; i<w_size; i++)
00421 v += w.vector[i]*w.vector[i];
00422 for(i=0; i<l; i++)
00423 {
00424 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
00425 if(alpha[i] > 0)
00426 ++nSV;
00427 }
00428 SG_INFO("Objective value = %lf\n",v/2);
00429 SG_INFO("nSV = %d\n",nSV);
00430
00431 SG_FREE(QD);
00432 SG_FREE(alpha);
00433 SG_FREE(y);
00434 SG_FREE(index);
00435 }
00436
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448 #undef GETI
00449 #define GETI(i) (y[i]+1)
00450
00451
00452 void CLibLinear::solve_l1r_l2_svc(
00453 problem *prob_col, double eps, double Cp, double Cn)
00454 {
00455 int l = prob_col->l;
00456 int w_size = prob_col->n;
00457 int j, s, iter = 0;
00458 int active_size = w_size;
00459 int max_num_linesearch = 20;
00460
00461 double sigma = 0.01;
00462 double d, G_loss, G, H;
00463 double Gmax_old = CMath::INFTY;
00464 double Gmax_new;
00465 double Gmax_init=0;
00466 double d_old, d_diff;
00467 double loss_old=0, loss_new;
00468 double appxcond, cond;
00469
00470 int *index = SG_MALLOC(int, w_size);
00471 int32_t *y = SG_MALLOC(int32_t, l);
00472 double *b = SG_MALLOC(double, l);
00473 double *xj_sq = SG_MALLOC(double, w_size);
00474
00475 CDotFeatures* x = (CDotFeatures*) prob_col->x;
00476 void* iterator;
00477 int32_t ind;
00478 float64_t val;
00479
00480 double C[3] = {Cn,0,Cp};
00481
00482 int n = prob_col->n;
00483 if (prob_col->use_bias)
00484 n--;
00485
00486 for(j=0; j<l; j++)
00487 {
00488 b[j] = 1;
00489 if(prob_col->y[j] > 0)
00490 y[j] = 1;
00491 else
00492 y[j] = -1;
00493 }
00494
00495 for(j=0; j<w_size; j++)
00496 {
00497 w.vector[j] = 0;
00498 index[j] = j;
00499 xj_sq[j] = 0;
00500
00501 if (use_bias && j==n)
00502 {
00503 for (ind=0; ind<l; ind++)
00504 xj_sq[n] += C[GETI(ind)];
00505 }
00506 else
00507 {
00508 iterator=x->get_feature_iterator(j);
00509 while (x->get_next_feature(ind, val, iterator))
00510 xj_sq[j] += C[GETI(ind)]*val*val;
00511 x->free_feature_iterator(iterator);
00512 }
00513 }
00514
00515
00516 CTime start_time;
00517 while (iter < max_iterations && !CSignal::cancel_computations())
00518 {
00519 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00520 break;
00521
00522 Gmax_new = 0;
00523
00524 for(j=0; j<active_size; j++)
00525 {
00526 int i = j+rand()%(active_size-j);
00527 CMath::swap(index[i], index[j]);
00528 }
00529
00530 for(s=0; s<active_size; s++)
00531 {
00532 j = index[s];
00533 G_loss = 0;
00534 H = 0;
00535
00536 if (use_bias && j==n)
00537 {
00538 for (ind=0; ind<l; ind++)
00539 {
00540 if(b[ind] > 0)
00541 {
00542 double tmp = C[GETI(ind)]*y[ind];
00543 G_loss -= tmp*b[ind];
00544 H += tmp*y[ind];
00545 }
00546 }
00547 }
00548 else
00549 {
00550 iterator=x->get_feature_iterator(j);
00551
00552 while (x->get_next_feature(ind, val, iterator))
00553 {
00554 if(b[ind] > 0)
00555 {
00556 double tmp = C[GETI(ind)]*val*y[ind];
00557 G_loss -= tmp*b[ind];
00558 H += tmp*val*y[ind];
00559 }
00560 }
00561 x->free_feature_iterator(iterator);
00562 }
00563
00564 G_loss *= 2;
00565
00566 G = G_loss;
00567 H *= 2;
00568 H = CMath::max(H, 1e-12);
00569
00570 double Gp = G+1;
00571 double Gn = G-1;
00572 double violation = 0;
00573 if(w.vector[j] == 0)
00574 {
00575 if(Gp < 0)
00576 violation = -Gp;
00577 else if(Gn > 0)
00578 violation = Gn;
00579 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00580 {
00581 active_size--;
00582 CMath::swap(index[s], index[active_size]);
00583 s--;
00584 continue;
00585 }
00586 }
00587 else if(w.vector[j] > 0)
00588 violation = fabs(Gp);
00589 else
00590 violation = fabs(Gn);
00591
00592 Gmax_new = CMath::max(Gmax_new, violation);
00593
00594
00595 if(Gp <= H*w.vector[j])
00596 d = -Gp/H;
00597 else if(Gn >= H*w.vector[j])
00598 d = -Gn/H;
00599 else
00600 d = -w.vector[j];
00601
00602 if(fabs(d) < 1.0e-12)
00603 continue;
00604
00605 double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
00606 d_old = 0;
00607 int num_linesearch;
00608 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00609 {
00610 d_diff = d_old - d;
00611 cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
00612
00613 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
00614 if(appxcond <= 0)
00615 {
00616 if (use_bias && j==n)
00617 {
00618 for (ind=0; ind<l; ind++)
00619 b[ind] += d_diff*y[ind];
00620 break;
00621 }
00622 else
00623 {
00624 iterator=x->get_feature_iterator(j);
00625 while (x->get_next_feature(ind, val, iterator))
00626 b[ind] += d_diff*val*y[ind];
00627
00628 x->free_feature_iterator(iterator);
00629 break;
00630 }
00631 }
00632
00633 if(num_linesearch == 0)
00634 {
00635 loss_old = 0;
00636 loss_new = 0;
00637
00638 if (use_bias && j==n)
00639 {
00640 for (ind=0; ind<l; ind++)
00641 {
00642 if(b[ind] > 0)
00643 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00644 double b_new = b[ind] + d_diff*y[ind];
00645 b[ind] = b_new;
00646 if(b_new > 0)
00647 loss_new += C[GETI(ind)]*b_new*b_new;
00648 }
00649 }
00650 else
00651 {
00652 iterator=x->get_feature_iterator(j);
00653 while (x->get_next_feature(ind, val, iterator))
00654 {
00655 if(b[ind] > 0)
00656 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00657 double b_new = b[ind] + d_diff*val*y[ind];
00658 b[ind] = b_new;
00659 if(b_new > 0)
00660 loss_new += C[GETI(ind)]*b_new*b_new;
00661 }
00662 x->free_feature_iterator(iterator);
00663 }
00664 }
00665 else
00666 {
00667 loss_new = 0;
00668 if (use_bias && j==n)
00669 {
00670 for (ind=0; ind<l; ind++)
00671 {
00672 double b_new = b[ind] + d_diff*y[ind];
00673 b[ind] = b_new;
00674 if(b_new > 0)
00675 loss_new += C[GETI(ind)]*b_new*b_new;
00676 }
00677 }
00678 else
00679 {
00680 iterator=x->get_feature_iterator(j);
00681 while (x->get_next_feature(ind, val, iterator))
00682 {
00683 double b_new = b[ind] + d_diff*val*y[ind];
00684 b[ind] = b_new;
00685 if(b_new > 0)
00686 loss_new += C[GETI(ind)]*b_new*b_new;
00687 }
00688 x->free_feature_iterator(iterator);
00689 }
00690 }
00691
00692 cond = cond + loss_new - loss_old;
00693 if(cond <= 0)
00694 break;
00695 else
00696 {
00697 d_old = d;
00698 d *= 0.5;
00699 delta *= 0.5;
00700 }
00701 }
00702
00703 w.vector[j] += d;
00704
00705
00706 if(num_linesearch >= max_num_linesearch)
00707 {
00708 SG_INFO("#");
00709 for(int i=0; i<l; i++)
00710 b[i] = 1;
00711
00712 for(int i=0; i<n; i++)
00713 {
00714 if(w.vector[i]==0)
00715 continue;
00716
00717 iterator=x->get_feature_iterator(i);
00718 while (x->get_next_feature(ind, val, iterator))
00719 b[ind] -= w.vector[i]*val*y[ind];
00720 x->free_feature_iterator(iterator);
00721 }
00722
00723 if (use_bias && w.vector[n])
00724 {
00725 for (ind=0; ind<l; ind++)
00726 b[ind] -= w.vector[n]*y[ind];
00727 }
00728 }
00729 }
00730
00731 if(iter == 0)
00732 Gmax_init = Gmax_new;
00733 iter++;
00734
00735 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new),
00736 -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
00737
00738 if(Gmax_new <= eps*Gmax_init)
00739 {
00740 if(active_size == w_size)
00741 break;
00742 else
00743 {
00744 active_size = w_size;
00745 Gmax_old = CMath::INFTY;
00746 continue;
00747 }
00748 }
00749
00750 Gmax_old = Gmax_new;
00751 }
00752
00753 SG_DONE();
00754 SG_INFO("optimization finished, #iter = %d\n", iter);
00755 if(iter >= max_iterations)
00756 SG_WARNING("\nWARNING: reaching max number of iterations\n");
00757
00758
00759
00760 double v = 0;
00761 int nnz = 0;
00762 for(j=0; j<w_size; j++)
00763 {
00764 if(w.vector[j] != 0)
00765 {
00766 v += fabs(w.vector[j]);
00767 nnz++;
00768 }
00769 }
00770 for(j=0; j<l; j++)
00771 if(b[j] > 0)
00772 v += C[GETI(j)]*b[j]*b[j];
00773
00774 SG_INFO("Objective value = %lf\n", v);
00775 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
00776
00777 SG_FREE(index);
00778 SG_FREE(y);
00779 SG_FREE(b);
00780 SG_FREE(xj_sq);
00781 }
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794 #undef GETI
00795 #define GETI(i) (y[i]+1)
00796
00797
00798 void CLibLinear::solve_l1r_lr(
00799 const problem *prob_col, double eps,
00800 double Cp, double Cn)
00801 {
00802 int l = prob_col->l;
00803 int w_size = prob_col->n;
00804 int j, s, iter = 0;
00805 int active_size = w_size;
00806 int max_num_linesearch = 20;
00807
00808 double x_min = 0;
00809 double sigma = 0.01;
00810 double d, G, H;
00811 double Gmax_old = CMath::INFTY;
00812 double Gmax_new;
00813 double Gmax_init=0;
00814 double sum1, appxcond1;
00815 double sum2, appxcond2;
00816 double cond;
00817
00818 int *index = SG_MALLOC(int, w_size);
00819 int32_t *y = SG_MALLOC(int32_t, l);
00820 double *exp_wTx = SG_MALLOC(double, l);
00821 double *exp_wTx_new = SG_MALLOC(double, l);
00822 double *xj_max = SG_MALLOC(double, w_size);
00823 double *C_sum = SG_MALLOC(double, w_size);
00824 double *xjneg_sum = SG_MALLOC(double, w_size);
00825 double *xjpos_sum = SG_MALLOC(double, w_size);
00826
00827 CDotFeatures* x = prob_col->x;
00828 void* iterator;
00829 int ind;
00830 double val;
00831
00832 double C[3] = {Cn,0,Cp};
00833
00834 int n = prob_col->n;
00835 if (prob_col->use_bias)
00836 n--;
00837
00838 for(j=0; j<l; j++)
00839 {
00840 exp_wTx[j] = 1;
00841 if(prob_col->y[j] > 0)
00842 y[j] = 1;
00843 else
00844 y[j] = -1;
00845 }
00846 for(j=0; j<w_size; j++)
00847 {
00848 w.vector[j] = 0;
00849 index[j] = j;
00850 xj_max[j] = 0;
00851 C_sum[j] = 0;
00852 xjneg_sum[j] = 0;
00853 xjpos_sum[j] = 0;
00854
00855 if (use_bias && j==n)
00856 {
00857 for (ind=0; ind<l; ind++)
00858 {
00859 x_min = CMath::min(x_min, 1.0);
00860 xj_max[j] = CMath::max(xj_max[j], 1.0);
00861 C_sum[j] += C[GETI(ind)];
00862 if(y[ind] == -1)
00863 xjneg_sum[j] += C[GETI(ind)];
00864 else
00865 xjpos_sum[j] += C[GETI(ind)];
00866 }
00867 }
00868 else
00869 {
00870 iterator=x->get_feature_iterator(j);
00871 while (x->get_next_feature(ind, val, iterator))
00872 {
00873 x_min = CMath::min(x_min, val);
00874 xj_max[j] = CMath::max(xj_max[j], val);
00875 C_sum[j] += C[GETI(ind)];
00876 if(y[ind] == -1)
00877 xjneg_sum[j] += C[GETI(ind)]*val;
00878 else
00879 xjpos_sum[j] += C[GETI(ind)]*val;
00880 }
00881 x->free_feature_iterator(iterator);
00882 }
00883 }
00884
00885 CTime start_time;
00886 while (iter < max_iterations && !CSignal::cancel_computations())
00887 {
00888 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00889 break;
00890
00891 Gmax_new = 0;
00892
00893 for(j=0; j<active_size; j++)
00894 {
00895 int i = j+rand()%(active_size-j);
00896 CMath::swap(index[i], index[j]);
00897 }
00898
00899 for(s=0; s<active_size; s++)
00900 {
00901 j = index[s];
00902 sum1 = 0;
00903 sum2 = 0;
00904 H = 0;
00905
00906 if (use_bias && j==n)
00907 {
00908 for (ind=0; ind<l; ind++)
00909 {
00910 double exp_wTxind = exp_wTx[ind];
00911 double tmp1 = 1.0/(1+exp_wTxind);
00912 double tmp2 = C[GETI(ind)]*tmp1;
00913 double tmp3 = tmp2*exp_wTxind;
00914 sum2 += tmp2;
00915 sum1 += tmp3;
00916 H += tmp1*tmp3;
00917 }
00918 }
00919 else
00920 {
00921 iterator=x->get_feature_iterator(j);
00922 while (x->get_next_feature(ind, val, iterator))
00923 {
00924 double exp_wTxind = exp_wTx[ind];
00925 double tmp1 = val/(1+exp_wTxind);
00926 double tmp2 = C[GETI(ind)]*tmp1;
00927 double tmp3 = tmp2*exp_wTxind;
00928 sum2 += tmp2;
00929 sum1 += tmp3;
00930 H += tmp1*tmp3;
00931 }
00932 x->free_feature_iterator(iterator);
00933 }
00934
00935 G = -sum2 + xjneg_sum[j];
00936
00937 double Gp = G+1;
00938 double Gn = G-1;
00939 double violation = 0;
00940 if(w.vector[j] == 0)
00941 {
00942 if(Gp < 0)
00943 violation = -Gp;
00944 else if(Gn > 0)
00945 violation = Gn;
00946 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00947 {
00948 active_size--;
00949 CMath::swap(index[s], index[active_size]);
00950 s--;
00951 continue;
00952 }
00953 }
00954 else if(w.vector[j] > 0)
00955 violation = fabs(Gp);
00956 else
00957 violation = fabs(Gn);
00958
00959 Gmax_new = CMath::max(Gmax_new, violation);
00960
00961
00962 if(Gp <= H*w.vector[j])
00963 d = -Gp/H;
00964 else if(Gn >= H*w.vector[j])
00965 d = -Gn/H;
00966 else
00967 d = -w.vector[j];
00968
00969 if(fabs(d) < 1.0e-12)
00970 continue;
00971
00972 d = CMath::min(CMath::max(d,-10.0),10.0);
00973
00974 double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
00975 int num_linesearch;
00976 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00977 {
00978 cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*delta;
00979
00980 if(x_min >= 0)
00981 {
00982 double tmp = exp(d*xj_max[j]);
00983 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
00984 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
00985 if(CMath::min(appxcond1,appxcond2) <= 0)
00986 {
00987 if (use_bias && j==n)
00988 {
00989 for (ind=0; ind<l; ind++)
00990 exp_wTx[ind] *= exp(d);
00991 }
00992
00993 else
00994 {
00995 iterator=x->get_feature_iterator(j);
00996 while (x->get_next_feature(ind, val, iterator))
00997 exp_wTx[ind] *= exp(d*val);
00998 x->free_feature_iterator(iterator);
00999 }
01000 break;
01001 }
01002 }
01003
01004 cond += d*xjneg_sum[j];
01005
01006 int i = 0;
01007
01008 if (use_bias && j==n)
01009 {
01010 for (ind=0; ind<l; ind++)
01011 {
01012 double exp_dx = exp(d);
01013 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01014 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01015 i++;
01016 }
01017 }
01018 else
01019 {
01020
01021 iterator=x->get_feature_iterator(j);
01022 while (x->get_next_feature(ind, val, iterator))
01023 {
01024 double exp_dx = exp(d*val);
01025 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01026 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01027 i++;
01028 }
01029 x->free_feature_iterator(iterator);
01030 }
01031
01032 if(cond <= 0)
01033 {
01034 i = 0;
01035 if (use_bias && j==n)
01036 {
01037 for (ind=0; ind<l; ind++)
01038 {
01039 exp_wTx[ind] = exp_wTx_new[i];
01040 i++;
01041 }
01042 }
01043 else
01044 {
01045 iterator=x->get_feature_iterator(j);
01046 while (x->get_next_feature(ind, val, iterator))
01047 {
01048 exp_wTx[ind] = exp_wTx_new[i];
01049 i++;
01050 }
01051 x->free_feature_iterator(iterator);
01052 }
01053 break;
01054 }
01055 else
01056 {
01057 d *= 0.5;
01058 delta *= 0.5;
01059 }
01060 }
01061
01062 w.vector[j] += d;
01063
01064
01065 if(num_linesearch >= max_num_linesearch)
01066 {
01067 SG_INFO("#");
01068 for(int i=0; i<l; i++)
01069 exp_wTx[i] = 0;
01070
01071 for(int i=0; i<w_size; i++)
01072 {
01073 if(w.vector[i]==0) continue;
01074
01075 if (use_bias && i==n)
01076 {
01077 for (ind=0; ind<l; ind++)
01078 exp_wTx[ind] += w.vector[i];
01079 }
01080 else
01081 {
01082 iterator=x->get_feature_iterator(i);
01083 while (x->get_next_feature(ind, val, iterator))
01084 exp_wTx[ind] += w.vector[i]*val;
01085 x->free_feature_iterator(iterator);
01086 }
01087 }
01088
01089 for(int i=0; i<l; i++)
01090 exp_wTx[i] = exp(exp_wTx[i]);
01091 }
01092 }
01093
01094 if(iter == 0)
01095 Gmax_init = Gmax_new;
01096 iter++;
01097 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01098
01099 if(Gmax_new <= eps*Gmax_init)
01100 {
01101 if(active_size == w_size)
01102 break;
01103 else
01104 {
01105 active_size = w_size;
01106 Gmax_old = CMath::INFTY;
01107 continue;
01108 }
01109 }
01110
01111 Gmax_old = Gmax_new;
01112 }
01113
01114 SG_DONE();
01115 SG_INFO("optimization finished, #iter = %d\n", iter);
01116 if(iter >= max_iterations)
01117 SG_WARNING("\nWARNING: reaching max number of iterations\n");
01118
01119
01120
01121 double v = 0;
01122 int nnz = 0;
01123 for(j=0; j<w_size; j++)
01124 if(w.vector[j] != 0)
01125 {
01126 v += fabs(w.vector[j]);
01127 nnz++;
01128 }
01129 for(j=0; j<l; j++)
01130 if(y[j] == 1)
01131 v += C[GETI(j)]*log(1+1/exp_wTx[j]);
01132 else
01133 v += C[GETI(j)]*log(1+exp_wTx[j]);
01134
01135 SG_INFO("Objective value = %lf\n", v);
01136 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
01137
01138 SG_FREE(index);
01139 SG_FREE(y);
01140 SG_FREE(exp_wTx);
01141 SG_FREE(exp_wTx_new);
01142 SG_FREE(xj_max);
01143 SG_FREE(C_sum);
01144 SG_FREE(xjneg_sum);
01145 SG_FREE(xjpos_sum);
01146 }
01147
01148
01149
01150
01151
01152
01153
01154
01155
01156
01157
01158
01159
01160
01161
01162
01163
01164
01165
01166 #undef GETI
01167 #define GETI(i) (y[i]+1)
01168
01169
01170 void CLibLinear::solve_l2r_lr_dual(const problem *prob, double eps, double Cp, double Cn)
01171 {
01172 int l = prob->l;
01173 int w_size = prob->n;
01174 int i, s, iter = 0;
01175 double *xTx = new double[l];
01176 int max_iter = 1000;
01177 int *index = new int[l];
01178 double *alpha = new double[2*l];
01179 int32_t *y = SG_MALLOC(int32_t, l);
01180 int max_inner_iter = 100;
01181 double innereps = 1e-2;
01182 double innereps_min = CMath::min(1e-8, eps);
01183 double upper_bound[3] = {Cn, 0, Cp};
01184 double Gmax_init = 0;
01185
01186 for(i=0; i<l; i++)
01187 {
01188 if(prob->y[i] > 0)
01189 {
01190 y[i] = +1;
01191 }
01192 else
01193 {
01194 y[i] = -1;
01195 }
01196 }
01197
01198
01199
01200
01201 for(i=0; i<l; i++)
01202 {
01203 alpha[2*i] = CMath::min(0.001*upper_bound[GETI(i)], 1e-8);
01204 alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
01205 }
01206
01207 for(i=0; i<w_size; i++)
01208 w[i] = 0;
01209 for(i=0; i<l; i++)
01210 {
01211 xTx[i] = prob->x->dot(i, prob->x,i);
01212 prob->x->add_to_dense_vec(y[i]*alpha[2*i], i, w.vector, w_size);
01213
01214 if (prob->use_bias)
01215 {
01216 w.vector[w_size]+=y[i]*alpha[2*i];
01217 xTx[i]+=1;
01218 }
01219 index[i] = i;
01220 }
01221
01222 while (iter < max_iter)
01223 {
01224 for (i=0; i<l; i++)
01225 {
01226 int j = i+rand()%(l-i);
01227 CMath::swap(index[i], index[j]);
01228 }
01229 int newton_iter = 0;
01230 double Gmax = 0;
01231 for (s=0; s<l; s++)
01232 {
01233 i = index[s];
01234 int32_t yi = y[i];
01235 double C = upper_bound[GETI(i)];
01236 double ywTx = 0, xisq = xTx[i];
01237
01238 ywTx = prob->x->dense_dot(i, w.vector, w_size);
01239 if (prob->use_bias)
01240 ywTx+=w.vector[w_size];
01241
01242 ywTx *= y[i];
01243 double a = xisq, b = ywTx;
01244
01245
01246 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
01247 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
01248 {
01249 ind1 = 2*i+1;
01250 ind2 = 2*i;
01251 sign = -1;
01252 }
01253
01254
01255 double alpha_old = alpha[ind1];
01256 double z = alpha_old;
01257 if(C - z < 0.5 * C)
01258 z = 0.1*z;
01259 double gp = a*(z-alpha_old)+sign*b+CMath::log(z/(C-z));
01260 Gmax = CMath::max(Gmax, CMath::abs(gp));
01261
01262
01263 const double eta = 0.1;
01264 int inner_iter = 0;
01265 while (inner_iter <= max_inner_iter)
01266 {
01267 if(fabs(gp) < innereps)
01268 break;
01269 double gpp = a + C/(C-z)/z;
01270 double tmpz = z - gp/gpp;
01271 if(tmpz <= 0)
01272 z *= eta;
01273 else
01274 z = tmpz;
01275 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
01276 newton_iter++;
01277 inner_iter++;
01278 }
01279
01280 if(inner_iter > 0)
01281 {
01282 alpha[ind1] = z;
01283 alpha[ind2] = C-z;
01284
01285 prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i, w.vector, w_size);
01286
01287 if (prob->use_bias)
01288 w.vector[w_size]+=sign*(z-alpha_old)*yi;
01289 }
01290 }
01291
01292 iter++;
01293 if(iter == 0)
01294 Gmax_init = Gmax;
01295
01296 SG_SABS_PROGRESS(Gmax, -CMath::log10(Gmax), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01297
01298 if(Gmax < eps)
01299 break;
01300
01301 if(newton_iter <= l/10)
01302 innereps = CMath::max(innereps_min, 0.1*innereps);
01303
01304 }
01305
01306 SG_DONE();
01307 SG_INFO("optimization finished, #iter = %d\n",iter);
01308 if (iter >= max_iter)
01309 SG_WARNING("reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
01310
01311
01312
01313 double v = 0;
01314 for(i=0; i<w_size; i++)
01315 v += w[i] * w[i];
01316 v *= 0.5;
01317 for(i=0; i<l; i++)
01318 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
01319 - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
01320 SG_INFO("Objective value = %lf\n", v);
01321
01322 delete [] xTx;
01323 delete [] alpha;
01324 delete [] y;
01325 delete [] index;
01326 }
01327
01328
01329 void CLibLinear::set_linear_term(const SGVector<float64_t> linear_term)
01330 {
01331 if (!m_labels)
01332 SG_ERROR("Please assign labels first!\n");
01333
01334 int32_t num_labels=m_labels->get_num_labels();
01335
01336 if (num_labels!=linear_term.vlen)
01337 {
01338 SG_ERROR("Number of labels (%d) does not match number"
01339 " of entries (%d) in linear term \n", num_labels,
01340 linear_term.vlen);
01341 }
01342
01343 m_linear_term=linear_term;
01344 }
01345
01346 SGVector<float64_t> CLibLinear::get_linear_term()
01347 {
01348 if (!m_linear_term.vlen || !m_linear_term.vector)
01349 SG_ERROR("Please assign linear term first!\n");
01350
01351 return m_linear_term;
01352 }
01353
01354 void CLibLinear::init_linear_term()
01355 {
01356 if (!m_labels)
01357 SG_ERROR("Please assign labels first!\n");
01358
01359 m_linear_term=SGVector<float64_t>(m_labels->get_num_labels());
01360 SGVector<float64_t>::fill_vector(m_linear_term.vector, m_linear_term.vlen, -1.0);
01361 }