00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/lib/config.h>
00012
00013 #ifdef HAVE_LAPACK
00014 #include <shogun/io/SGIO.h>
00015 #include <shogun/lib/Signal.h>
00016 #include <shogun/lib/Time.h>
00017 #include <shogun/base/Parameter.h>
00018 #include <shogun/classifier/svm/LibLinear.h>
00019 #include <shogun/classifier/svm/SVM_linear.h>
00020 #include <shogun/classifier/svm/Tron.h>
00021 #include <shogun/features/DotFeatures.h>
00022
00023 using namespace shogun;
00024
00025 CLibLinear::CLibLinear()
00026 : CLinearMachine()
00027 {
00028 init();
00029 }
00030
00031 CLibLinear::CLibLinear(LIBLINEAR_SOLVER_TYPE l)
00032 : CLinearMachine()
00033 {
00034 init();
00035 liblinear_solver_type=l;
00036 }
00037
00038 CLibLinear::CLibLinear(
00039 float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00040 : CLinearMachine()
00041 {
00042 init();
00043 C1=C;
00044 C2=C;
00045 use_bias=true;
00046
00047 set_features(traindat);
00048 set_labels(trainlab);
00049 init_linear_term();
00050 }
00051
00052 void CLibLinear::init()
00053 {
00054 liblinear_solver_type=L2R_L1LOSS_SVC_DUAL;
00055 use_bias=false;
00056 C1=1;
00057 C2=1;
00058 set_max_iterations();
00059 epsilon=1e-5;
00060
00061 SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
00062 SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
00063 SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
00064 MS_NOT_AVAILABLE);
00065 SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
00066 SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
00067 MS_NOT_AVAILABLE);
00068 SG_ADD(&m_linear_term, "linear_term", "Linear Term", MS_NOT_AVAILABLE);
00069 SG_ADD((machine_int_t*) &liblinear_solver_type, "liblinear_solver_type",
00070 "Type of LibLinear solver.", MS_NOT_AVAILABLE);
00071 }
00072
00073 CLibLinear::~CLibLinear()
00074 {
00075 m_linear_term.destroy_vector();
00076 }
00077
00078 bool CLibLinear::train_machine(CFeatures* data)
00079 {
00080 CSignal::clear_cancel();
00081 ASSERT(labels);
00082
00083 if (data)
00084 {
00085 if (!data->has_property(FP_DOT))
00086 SG_ERROR("Specified features are not of type CDotFeatures\n");
00087
00088 set_features((CDotFeatures*) data);
00089 }
00090 ASSERT(features);
00091 ASSERT(labels->is_two_class_labeling());
00092
00093
00094 int32_t num_train_labels=labels->get_num_labels();
00095 int32_t num_feat=features->get_dim_feature_space();
00096 int32_t num_vec=features->get_num_vectors();
00097
00098 if (liblinear_solver_type == L1R_L2LOSS_SVC ||
00099 (liblinear_solver_type == L1R_LR) )
00100 {
00101 if (num_feat!=num_train_labels)
00102 {
00103 SG_ERROR("L1 methods require the data to be transposed: "
00104 "number of features %d does not match number of "
00105 "training labels %d\n",
00106 num_feat, num_train_labels);
00107 }
00108 CMath::swap(num_feat, num_vec);
00109
00110 }
00111 else
00112 {
00113 if (num_vec!=num_train_labels)
00114 {
00115 SG_ERROR("number of vectors %d does not match "
00116 "number of training labels %d\n",
00117 num_vec, num_train_labels);
00118 }
00119 }
00120 SG_FREE(w);
00121 if (use_bias)
00122 w=SG_MALLOC(float64_t, num_feat+1);
00123 else
00124 w=SG_MALLOC(float64_t, num_feat+0);
00125 w_dim=num_feat;
00126
00127 problem prob;
00128 if (use_bias)
00129 {
00130 prob.n=w_dim+1;
00131 memset(w, 0, sizeof(float64_t)*(w_dim+1));
00132 }
00133 else
00134 {
00135 prob.n=w_dim;
00136 memset(w, 0, sizeof(float64_t)*(w_dim+0));
00137 }
00138 prob.l=num_vec;
00139 prob.x=features;
00140 prob.y=SG_MALLOC(int, prob.l);
00141 prob.use_bias=use_bias;
00142
00143 for (int32_t i=0; i<prob.l; i++)
00144 prob.y[i]=labels->get_int_label(i);
00145
00146 int pos = 0;
00147 int neg = 0;
00148 for(int i=0;i<prob.l;i++)
00149 {
00150 if(prob.y[i]==+1)
00151 pos++;
00152 }
00153 neg = prob.l - pos;
00154
00155 SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00156
00157 function *fun_obj=NULL;
00158 double Cp=C1;
00159 double Cn=C2;
00160 switch (liblinear_solver_type)
00161 {
00162 case L2R_LR:
00163 {
00164 fun_obj=new l2r_lr_fun(&prob, Cp, Cn);
00165 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00166 SG_DEBUG("starting L2R_LR training via tron\n");
00167 tron_obj.tron(w, max_train_time);
00168 SG_DEBUG("done with tron\n");
00169 delete fun_obj;
00170 break;
00171 }
00172 case L2R_L2LOSS_SVC:
00173 {
00174 fun_obj=new l2r_l2_svc_fun(&prob, Cp, Cn);
00175 CTron tron_obj(fun_obj, epsilon*CMath::min(pos,neg)/prob.l, max_iterations);
00176 tron_obj.tron(w, max_train_time);
00177 delete fun_obj;
00178 break;
00179 }
00180 case L2R_L2LOSS_SVC_DUAL:
00181 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
00182 break;
00183 case L2R_L1LOSS_SVC_DUAL:
00184 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
00185 break;
00186 case L1R_L2LOSS_SVC:
00187 {
00188
00189 solve_l1r_l2_svc(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00190 break;
00191 }
00192 case L1R_LR:
00193 {
00194
00195 solve_l1r_lr(&prob, epsilon*CMath::min(pos,neg)/prob.l, Cp, Cn);
00196 break;
00197 }
00198 case MCSVM_CS:
00199 {
00200 SG_NOTIMPLEMENTED;
00201
00202
00203
00204
00205
00206
00207
00208
00209 }
00210 default:
00211 SG_ERROR("Error: unknown solver_type\n");
00212 break;
00213 }
00214
00215 if (use_bias)
00216 set_bias(w[w_dim]);
00217 else
00218 set_bias(0);
00219
00220 SG_FREE(prob.y);
00221
00222 return true;
00223 }
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249 #undef GETI
00250 #define GETI(i) (y[i]+1)
00251
00252
00253 void CLibLinear::solve_l2r_l1l2_svc(
00254 const problem *prob, double eps, double Cp, double Cn, LIBLINEAR_SOLVER_TYPE st)
00255 {
00256 int l = prob->l;
00257 int w_size = prob->n;
00258 int i, s, iter = 0;
00259 double C, d, G;
00260 double *QD = SG_MALLOC(double, l);
00261 int *index = SG_MALLOC(int, l);
00262 double *alpha = SG_MALLOC(double, l);
00263 int32_t *y = SG_MALLOC(int32_t, l);
00264 int active_size = l;
00265
00266
00267 double PG;
00268 double PGmax_old = CMath::INFTY;
00269 double PGmin_old = -CMath::INFTY;
00270 double PGmax_new, PGmin_new;
00271
00272
00273 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00274 double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00275 if(st == L2R_L1LOSS_SVC_DUAL)
00276 {
00277 diag[0] = 0;
00278 diag[2] = 0;
00279 upper_bound[0] = Cn;
00280 upper_bound[2] = Cp;
00281 }
00282
00283 int n = prob->n;
00284
00285 if (prob->use_bias)
00286 n--;
00287
00288 for(i=0; i<w_size; i++)
00289 w[i] = 0;
00290
00291 for(i=0; i<l; i++)
00292 {
00293 alpha[i] = 0;
00294 if(prob->y[i] > 0)
00295 {
00296 y[i] = +1;
00297 }
00298 else
00299 {
00300 y[i] = -1;
00301 }
00302 QD[i] = diag[GETI(i)];
00303
00304 QD[i] += prob->x->dot(i, prob->x,i);
00305 index[i] = i;
00306 }
00307
00308
00309 CTime start_time;
00310 while (iter < max_iterations && !CSignal::cancel_computations())
00311 {
00312 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00313 break;
00314
00315 PGmax_new = -CMath::INFTY;
00316 PGmin_new = CMath::INFTY;
00317
00318 for (i=0; i<active_size; i++)
00319 {
00320 int j = i+rand()%(active_size-i);
00321 CMath::swap(index[i], index[j]);
00322 }
00323
00324 for (s=0;s<active_size;s++)
00325 {
00326 i = index[s];
00327 int32_t yi = y[i];
00328
00329 G = prob->x->dense_dot(i, w, n);
00330 if (prob->use_bias)
00331 G+=w[n];
00332
00333 if (m_linear_term.vector)
00334 G = G*yi + m_linear_term.vector[i];
00335 else
00336 G = G*yi-1;
00337
00338 C = upper_bound[GETI(i)];
00339 G += alpha[i]*diag[GETI(i)];
00340
00341 PG = 0;
00342 if (alpha[i] == 0)
00343 {
00344 if (G > PGmax_old)
00345 {
00346 active_size--;
00347 CMath::swap(index[s], index[active_size]);
00348 s--;
00349 continue;
00350 }
00351 else if (G < 0)
00352 PG = G;
00353 }
00354 else if (alpha[i] == C)
00355 {
00356 if (G < PGmin_old)
00357 {
00358 active_size--;
00359 CMath::swap(index[s], index[active_size]);
00360 s--;
00361 continue;
00362 }
00363 else if (G > 0)
00364 PG = G;
00365 }
00366 else
00367 PG = G;
00368
00369 PGmax_new = CMath::max(PGmax_new, PG);
00370 PGmin_new = CMath::min(PGmin_new, PG);
00371
00372 if(fabs(PG) > 1.0e-12)
00373 {
00374 double alpha_old = alpha[i];
00375 alpha[i] = CMath::min(CMath::max(alpha[i] - G/QD[i], 0.0), C);
00376 d = (alpha[i] - alpha_old)*yi;
00377
00378 prob->x->add_to_dense_vec(d, i, w, n);
00379
00380 if (prob->use_bias)
00381 w[n]+=d;
00382 }
00383 }
00384
00385 iter++;
00386 float64_t gap=PGmax_new - PGmin_new;
00387 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00388
00389 if(gap <= eps)
00390 {
00391 if(active_size == l)
00392 break;
00393 else
00394 {
00395 active_size = l;
00396 PGmax_old = CMath::INFTY;
00397 PGmin_old = -CMath::INFTY;
00398 continue;
00399 }
00400 }
00401 PGmax_old = PGmax_new;
00402 PGmin_old = PGmin_new;
00403 if (PGmax_old <= 0)
00404 PGmax_old = CMath::INFTY;
00405 if (PGmin_old >= 0)
00406 PGmin_old = -CMath::INFTY;
00407 }
00408
00409 SG_DONE();
00410 SG_INFO("optimization finished, #iter = %d\n",iter);
00411 if (iter >= max_iterations)
00412 {
00413 SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00414 "(also see liblinear FAQ)\n\n");
00415 }
00416
00417
00418
00419 double v = 0;
00420 int nSV = 0;
00421 for(i=0; i<w_size; i++)
00422 v += w[i]*w[i];
00423 for(i=0; i<l; i++)
00424 {
00425 v += alpha[i]*(alpha[i]*diag[GETI(i)] - 2);
00426 if(alpha[i] > 0)
00427 ++nSV;
00428 }
00429 SG_INFO("Objective value = %lf\n",v/2);
00430 SG_INFO("nSV = %d\n",nSV);
00431
00432 delete [] QD;
00433 delete [] alpha;
00434 delete [] y;
00435 delete [] index;
00436 }
00437
00438
00439
00440
00441
00442
00443
00444
00445
00446
00447
00448
00449 #undef GETI
00450 #define GETI(i) (y[i]+1)
00451
00452
00453 void CLibLinear::solve_l1r_l2_svc(
00454 problem *prob_col, double eps, double Cp, double Cn)
00455 {
00456 int l = prob_col->l;
00457 int w_size = prob_col->n;
00458 int j, s, iter = 0;
00459 int active_size = w_size;
00460 int max_num_linesearch = 20;
00461
00462 double sigma = 0.01;
00463 double d, G_loss, G, H;
00464 double Gmax_old = CMath::INFTY;
00465 double Gmax_new;
00466 double Gmax_init=0;
00467 double d_old, d_diff;
00468 double loss_old=0, loss_new;
00469 double appxcond, cond;
00470
00471 int *index = SG_MALLOC(int, w_size);
00472 int32_t *y = SG_MALLOC(int32_t, l);
00473 double *b = SG_MALLOC(double, l);
00474 double *xj_sq = SG_MALLOC(double, w_size);
00475
00476 CDotFeatures* x = (CDotFeatures*) prob_col->x;
00477 void* iterator;
00478 int32_t ind;
00479 float64_t val;
00480
00481 double C[3] = {Cn,0,Cp};
00482
00483 int n = prob_col->n;
00484 if (prob_col->use_bias)
00485 n--;
00486
00487 for(j=0; j<l; j++)
00488 {
00489 b[j] = 1;
00490 if(prob_col->y[j] > 0)
00491 y[j] = 1;
00492 else
00493 y[j] = -1;
00494 }
00495
00496 for(j=0; j<w_size; j++)
00497 {
00498 w[j] = 0;
00499 index[j] = j;
00500 xj_sq[j] = 0;
00501
00502 if (use_bias && j==n)
00503 {
00504 for (ind=0; ind<l; ind++)
00505 xj_sq[n] += C[GETI(ind)];
00506 }
00507 else
00508 {
00509 iterator=x->get_feature_iterator(j);
00510 while (x->get_next_feature(ind, val, iterator))
00511 xj_sq[j] += C[GETI(ind)]*val*val;
00512 x->free_feature_iterator(iterator);
00513 }
00514 }
00515
00516
00517 CTime start_time;
00518 while (iter < max_iterations && !CSignal::cancel_computations())
00519 {
00520 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00521 break;
00522
00523 Gmax_new = 0;
00524
00525 for(j=0; j<active_size; j++)
00526 {
00527 int i = j+rand()%(active_size-j);
00528 CMath::swap(index[i], index[j]);
00529 }
00530
00531 for(s=0; s<active_size; s++)
00532 {
00533 j = index[s];
00534 G_loss = 0;
00535 H = 0;
00536
00537 if (use_bias && j==n)
00538 {
00539 for (ind=0; ind<l; ind++)
00540 {
00541 if(b[ind] > 0)
00542 {
00543 double tmp = C[GETI(ind)]*y[ind];
00544 G_loss -= tmp*b[ind];
00545 H += tmp*y[ind];
00546 }
00547 }
00548 }
00549 else
00550 {
00551 iterator=x->get_feature_iterator(j);
00552
00553 while (x->get_next_feature(ind, val, iterator))
00554 {
00555 if(b[ind] > 0)
00556 {
00557 double tmp = C[GETI(ind)]*val*y[ind];
00558 G_loss -= tmp*b[ind];
00559 H += tmp*val*y[ind];
00560 }
00561 }
00562 x->free_feature_iterator(iterator);
00563 }
00564
00565 G_loss *= 2;
00566
00567 G = G_loss;
00568 H *= 2;
00569 H = CMath::max(H, 1e-12);
00570
00571 double Gp = G+1;
00572 double Gn = G-1;
00573 double violation = 0;
00574 if(w[j] == 0)
00575 {
00576 if(Gp < 0)
00577 violation = -Gp;
00578 else if(Gn > 0)
00579 violation = Gn;
00580 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00581 {
00582 active_size--;
00583 CMath::swap(index[s], index[active_size]);
00584 s--;
00585 continue;
00586 }
00587 }
00588 else if(w[j] > 0)
00589 violation = fabs(Gp);
00590 else
00591 violation = fabs(Gn);
00592
00593 Gmax_new = CMath::max(Gmax_new, violation);
00594
00595
00596 if(Gp <= H*w[j])
00597 d = -Gp/H;
00598 else if(Gn >= H*w[j])
00599 d = -Gn/H;
00600 else
00601 d = -w[j];
00602
00603 if(fabs(d) < 1.0e-12)
00604 continue;
00605
00606 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00607 d_old = 0;
00608 int num_linesearch;
00609 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00610 {
00611 d_diff = d_old - d;
00612 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00613
00614 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
00615 if(appxcond <= 0)
00616 {
00617 if (use_bias && j==n)
00618 {
00619 for (ind=0; ind<l; ind++)
00620 b[ind] += d_diff*y[ind];
00621 break;
00622 }
00623 else
00624 {
00625 iterator=x->get_feature_iterator(j);
00626 while (x->get_next_feature(ind, val, iterator))
00627 b[ind] += d_diff*val*y[ind];
00628
00629 x->free_feature_iterator(iterator);
00630 break;
00631 }
00632 }
00633
00634 if(num_linesearch == 0)
00635 {
00636 loss_old = 0;
00637 loss_new = 0;
00638
00639 if (use_bias && j==n)
00640 {
00641 for (ind=0; ind<l; ind++)
00642 {
00643 if(b[ind] > 0)
00644 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00645 double b_new = b[ind] + d_diff*y[ind];
00646 b[ind] = b_new;
00647 if(b_new > 0)
00648 loss_new += C[GETI(ind)]*b_new*b_new;
00649 }
00650 }
00651 else
00652 {
00653 iterator=x->get_feature_iterator(j);
00654 while (x->get_next_feature(ind, val, iterator))
00655 {
00656 if(b[ind] > 0)
00657 loss_old += C[GETI(ind)]*b[ind]*b[ind];
00658 double b_new = b[ind] + d_diff*val*y[ind];
00659 b[ind] = b_new;
00660 if(b_new > 0)
00661 loss_new += C[GETI(ind)]*b_new*b_new;
00662 }
00663 x->free_feature_iterator(iterator);
00664 }
00665 }
00666 else
00667 {
00668 loss_new = 0;
00669 if (use_bias && j==n)
00670 {
00671 for (ind=0; ind<l; ind++)
00672 {
00673 double b_new = b[ind] + d_diff*y[ind];
00674 b[ind] = b_new;
00675 if(b_new > 0)
00676 loss_new += C[GETI(ind)]*b_new*b_new;
00677 }
00678 }
00679 else
00680 {
00681 iterator=x->get_feature_iterator(j);
00682 while (x->get_next_feature(ind, val, iterator))
00683 {
00684 double b_new = b[ind] + d_diff*val*y[ind];
00685 b[ind] = b_new;
00686 if(b_new > 0)
00687 loss_new += C[GETI(ind)]*b_new*b_new;
00688 }
00689 x->free_feature_iterator(iterator);
00690 }
00691 }
00692
00693 cond = cond + loss_new - loss_old;
00694 if(cond <= 0)
00695 break;
00696 else
00697 {
00698 d_old = d;
00699 d *= 0.5;
00700 delta *= 0.5;
00701 }
00702 }
00703
00704 w[j] += d;
00705
00706
00707 if(num_linesearch >= max_num_linesearch)
00708 {
00709 SG_INFO("#");
00710 for(int i=0; i<l; i++)
00711 b[i] = 1;
00712
00713 for(int i=0; i<n; i++)
00714 {
00715 if(w[i]==0)
00716 continue;
00717
00718 iterator=x->get_feature_iterator(i);
00719 while (x->get_next_feature(ind, val, iterator))
00720 b[ind] -= w[i]*val*y[ind];
00721 x->free_feature_iterator(iterator);
00722 }
00723
00724 if (use_bias && w[n])
00725 {
00726 for (ind=0; ind<l; ind++)
00727 b[ind] -= w[n]*y[ind];
00728 }
00729 }
00730 }
00731
00732 if(iter == 0)
00733 Gmax_init = Gmax_new;
00734 iter++;
00735
00736 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
00737
00738 if(Gmax_new <= eps*Gmax_init)
00739 {
00740 if(active_size == w_size)
00741 break;
00742 else
00743 {
00744 active_size = w_size;
00745 Gmax_old = CMath::INFTY;
00746 continue;
00747 }
00748 }
00749
00750 Gmax_old = Gmax_new;
00751 }
00752
00753 SG_DONE();
00754 SG_INFO("optimization finished, #iter = %d\n", iter);
00755 if(iter >= max_iterations)
00756 SG_WARNING("\nWARNING: reaching max number of iterations\n");
00757
00758
00759
00760 double v = 0;
00761 int nnz = 0;
00762 for(j=0; j<w_size; j++)
00763 {
00764 if(w[j] != 0)
00765 {
00766 v += fabs(w[j]);
00767 nnz++;
00768 }
00769 }
00770 for(j=0; j<l; j++)
00771 if(b[j] > 0)
00772 v += C[GETI(j)]*b[j]*b[j];
00773
00774 SG_INFO("Objective value = %lf\n", v);
00775 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
00776
00777 delete [] index;
00778 delete [] y;
00779 delete [] b;
00780 delete [] xj_sq;
00781 }
00782
00783
00784
00785
00786
00787
00788
00789
00790
00791
00792
00793
00794 #undef GETI
00795 #define GETI(i) (y[i]+1)
00796
00797
00798 void CLibLinear::solve_l1r_lr(
00799 const problem *prob_col, double eps,
00800 double Cp, double Cn)
00801 {
00802 int l = prob_col->l;
00803 int w_size = prob_col->n;
00804 int j, s, iter = 0;
00805 int active_size = w_size;
00806 int max_num_linesearch = 20;
00807
00808 double x_min = 0;
00809 double sigma = 0.01;
00810 double d, G, H;
00811 double Gmax_old = CMath::INFTY;
00812 double Gmax_new;
00813 double Gmax_init=0;
00814 double sum1, appxcond1;
00815 double sum2, appxcond2;
00816 double cond;
00817
00818 int *index = SG_MALLOC(int, w_size);
00819 int32_t *y = SG_MALLOC(int32_t, l);
00820 double *exp_wTx = SG_MALLOC(double, l);
00821 double *exp_wTx_new = SG_MALLOC(double, l);
00822 double *xj_max = SG_MALLOC(double, w_size);
00823 double *C_sum = SG_MALLOC(double, w_size);
00824 double *xjneg_sum = SG_MALLOC(double, w_size);
00825 double *xjpos_sum = SG_MALLOC(double, w_size);
00826
00827 CDotFeatures* x = prob_col->x;
00828 void* iterator;
00829 int ind;
00830 double val;
00831
00832 double C[3] = {Cn,0,Cp};
00833
00834 int n = prob_col->n;
00835 if (prob_col->use_bias)
00836 n--;
00837
00838 for(j=0; j<l; j++)
00839 {
00840 exp_wTx[j] = 1;
00841 if(prob_col->y[j] > 0)
00842 y[j] = 1;
00843 else
00844 y[j] = -1;
00845 }
00846 for(j=0; j<w_size; j++)
00847 {
00848 w[j] = 0;
00849 index[j] = j;
00850 xj_max[j] = 0;
00851 C_sum[j] = 0;
00852 xjneg_sum[j] = 0;
00853 xjpos_sum[j] = 0;
00854
00855 if (use_bias && j==n)
00856 {
00857 for (ind=0; ind<l; ind++)
00858 {
00859 x_min = CMath::min(x_min, 1.0);
00860 xj_max[j] = CMath::max(xj_max[j], 1.0);
00861 C_sum[j] += C[GETI(ind)];
00862 if(y[ind] == -1)
00863 xjneg_sum[j] += C[GETI(ind)];
00864 else
00865 xjpos_sum[j] += C[GETI(ind)];
00866 }
00867 }
00868 else
00869 {
00870 iterator=x->get_feature_iterator(j);
00871 while (x->get_next_feature(ind, val, iterator))
00872 {
00873 x_min = CMath::min(x_min, val);
00874 xj_max[j] = CMath::max(xj_max[j], val);
00875 C_sum[j] += C[GETI(ind)];
00876 if(y[ind] == -1)
00877 xjneg_sum[j] += C[GETI(ind)]*val;
00878 else
00879 xjpos_sum[j] += C[GETI(ind)]*val;
00880 }
00881 x->free_feature_iterator(iterator);
00882 }
00883 }
00884
00885 CTime start_time;
00886 while (iter < max_iterations && !CSignal::cancel_computations())
00887 {
00888 if (max_train_time > 0 && start_time.cur_time_diff() > max_train_time)
00889 break;
00890
00891 Gmax_new = 0;
00892
00893 for(j=0; j<active_size; j++)
00894 {
00895 int i = j+rand()%(active_size-j);
00896 CMath::swap(index[i], index[j]);
00897 }
00898
00899 for(s=0; s<active_size; s++)
00900 {
00901 j = index[s];
00902 sum1 = 0;
00903 sum2 = 0;
00904 H = 0;
00905
00906 if (use_bias && j==n)
00907 {
00908 for (ind=0; ind<l; ind++)
00909 {
00910 double exp_wTxind = exp_wTx[ind];
00911 double tmp1 = 1.0/(1+exp_wTxind);
00912 double tmp2 = C[GETI(ind)]*tmp1;
00913 double tmp3 = tmp2*exp_wTxind;
00914 sum2 += tmp2;
00915 sum1 += tmp3;
00916 H += tmp1*tmp3;
00917 }
00918 }
00919 else
00920 {
00921 iterator=x->get_feature_iterator(j);
00922 while (x->get_next_feature(ind, val, iterator))
00923 {
00924 double exp_wTxind = exp_wTx[ind];
00925 double tmp1 = val/(1+exp_wTxind);
00926 double tmp2 = C[GETI(ind)]*tmp1;
00927 double tmp3 = tmp2*exp_wTxind;
00928 sum2 += tmp2;
00929 sum1 += tmp3;
00930 H += tmp1*tmp3;
00931 }
00932 x->free_feature_iterator(iterator);
00933 }
00934
00935 G = -sum2 + xjneg_sum[j];
00936
00937 double Gp = G+1;
00938 double Gn = G-1;
00939 double violation = 0;
00940 if(w[j] == 0)
00941 {
00942 if(Gp < 0)
00943 violation = -Gp;
00944 else if(Gn > 0)
00945 violation = Gn;
00946 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
00947 {
00948 active_size--;
00949 CMath::swap(index[s], index[active_size]);
00950 s--;
00951 continue;
00952 }
00953 }
00954 else if(w[j] > 0)
00955 violation = fabs(Gp);
00956 else
00957 violation = fabs(Gn);
00958
00959 Gmax_new = CMath::max(Gmax_new, violation);
00960
00961
00962 if(Gp <= H*w[j])
00963 d = -Gp/H;
00964 else if(Gn >= H*w[j])
00965 d = -Gn/H;
00966 else
00967 d = -w[j];
00968
00969 if(fabs(d) < 1.0e-12)
00970 continue;
00971
00972 d = CMath::min(CMath::max(d,-10.0),10.0);
00973
00974 double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
00975 int num_linesearch;
00976 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
00977 {
00978 cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
00979
00980 if(x_min >= 0)
00981 {
00982 double tmp = exp(d*xj_max[j]);
00983 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
00984 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
00985 if(CMath::min(appxcond1,appxcond2) <= 0)
00986 {
00987 if (use_bias && j==n)
00988 {
00989 for (ind=0; ind<l; ind++)
00990 exp_wTx[ind] *= exp(d);
00991 }
00992
00993 else
00994 {
00995 iterator=x->get_feature_iterator(j);
00996 while (x->get_next_feature(ind, val, iterator))
00997 exp_wTx[ind] *= exp(d*val);
00998 x->free_feature_iterator(iterator);
00999 }
01000 break;
01001 }
01002 }
01003
01004 cond += d*xjneg_sum[j];
01005
01006 int i = 0;
01007
01008 if (use_bias && j==n)
01009 {
01010 for (ind=0; ind<l; ind++)
01011 {
01012 double exp_dx = exp(d);
01013 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01014 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01015 i++;
01016 }
01017 }
01018 else
01019 {
01020
01021 iterator=x->get_feature_iterator(j);
01022 while (x->get_next_feature(ind, val, iterator))
01023 {
01024 double exp_dx = exp(d*val);
01025 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
01026 cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
01027 i++;
01028 }
01029 x->free_feature_iterator(iterator);
01030 }
01031
01032 if(cond <= 0)
01033 {
01034 i = 0;
01035 if (use_bias && j==n)
01036 {
01037 for (ind=0; ind<l; ind++)
01038 {
01039 exp_wTx[ind] = exp_wTx_new[i];
01040 i++;
01041 }
01042 }
01043 else
01044 {
01045 iterator=x->get_feature_iterator(j);
01046 while (x->get_next_feature(ind, val, iterator))
01047 {
01048 exp_wTx[ind] = exp_wTx_new[i];
01049 i++;
01050 }
01051 x->free_feature_iterator(iterator);
01052 }
01053 break;
01054 }
01055 else
01056 {
01057 d *= 0.5;
01058 delta *= 0.5;
01059 }
01060 }
01061
01062 w[j] += d;
01063
01064
01065 if(num_linesearch >= max_num_linesearch)
01066 {
01067 SG_INFO("#");
01068 for(int i=0; i<l; i++)
01069 exp_wTx[i] = 0;
01070
01071 for(int i=0; i<w_size; i++)
01072 {
01073 if(w[i]==0) continue;
01074
01075 if (use_bias && i==n)
01076 {
01077 for (ind=0; ind<l; ind++)
01078 exp_wTx[ind] += w[i];
01079 }
01080 else
01081 {
01082 iterator=x->get_feature_iterator(i);
01083 while (x->get_next_feature(ind, val, iterator))
01084 exp_wTx[ind] += w[i]*val;
01085 x->free_feature_iterator(iterator);
01086 }
01087 }
01088
01089 for(int i=0; i<l; i++)
01090 exp_wTx[i] = exp(exp_wTx[i]);
01091 }
01092 }
01093
01094 if(iter == 0)
01095 Gmax_init = Gmax_new;
01096 iter++;
01097 SG_SABS_PROGRESS(Gmax_new, -CMath::log10(Gmax_new), -CMath::log10(Gmax_init), -CMath::log10(eps*Gmax_init), 6);
01098
01099 if(Gmax_new <= eps*Gmax_init)
01100 {
01101 if(active_size == w_size)
01102 break;
01103 else
01104 {
01105 active_size = w_size;
01106 Gmax_old = CMath::INFTY;
01107 continue;
01108 }
01109 }
01110
01111 Gmax_old = Gmax_new;
01112 }
01113
01114 SG_DONE();
01115 SG_INFO("optimization finished, #iter = %d\n", iter);
01116 if(iter >= max_iterations)
01117 SG_WARNING("\nWARNING: reaching max number of iterations\n");
01118
01119
01120
01121 double v = 0;
01122 int nnz = 0;
01123 for(j=0; j<w_size; j++)
01124 if(w[j] != 0)
01125 {
01126 v += fabs(w[j]);
01127 nnz++;
01128 }
01129 for(j=0; j<l; j++)
01130 if(y[j] == 1)
01131 v += C[GETI(j)]*log(1+1/exp_wTx[j]);
01132 else
01133 v += C[GETI(j)]*log(1+exp_wTx[j]);
01134
01135 SG_INFO("Objective value = %lf\n", v);
01136 SG_INFO("#nonzeros/#features = %d/%d\n", nnz, w_size);
01137
01138 delete [] index;
01139 delete [] y;
01140 delete [] exp_wTx;
01141 delete [] exp_wTx_new;
01142 delete [] xj_max;
01143 delete [] C_sum;
01144 delete [] xjneg_sum;
01145 delete [] xjpos_sum;
01146 }
01147
01148 SGVector<float64_t> CLibLinear::get_linear_term()
01149 {
01150 if (!m_linear_term.vlen || !m_linear_term.vector)
01151 SG_ERROR("Please assign linear term first!\n");
01152
01153 return m_linear_term;
01154 }
01155
01156 void CLibLinear::init_linear_term()
01157 {
01158 if (!labels)
01159 SG_ERROR("Please assign labels first!\n");
01160
01161 m_linear_term.destroy_vector();
01162
01163 m_linear_term=SGVector<float64_t>(labels->get_num_labels());
01164 CMath::fill_vector(m_linear_term.vector, m_linear_term.vlen, -1.0);
01165 }
01166
01167 #endif //HAVE_LAPACK