23 using namespace shogun;
52 void CLibLinear::init()
86 SG_ERROR(
"Specified features are not of type CDotFeatures\n");
100 if (num_feat!=num_train_labels)
102 SG_ERROR(
"L1 methods require the data to be transposed: "
103 "number of features %d does not match number of "
104 "training labels %d\n",
105 num_feat, num_train_labels);
112 if (num_vec!=num_train_labels)
114 SG_ERROR(
"number of vectors %d does not match "
115 "number of training labels %d\n",
116 num_vec, num_train_labels);
143 for (int32_t i=0; i<prob.l; i++)
148 else if (prob.y[i] == -1)
151 SG_ERROR(
"labels should be +1/-1 only\n");
156 for(
int i=0;i<prob.l;i++)
163 SG_INFO(
"%d training points %d dims\n", prob.l, prob.n);
165 function *fun_obj=NULL;
170 fun_obj=
new l2r_lr_fun(&prob, Cs);
172 SG_DEBUG(
"starting L2R_LR training via tron\n");
180 fun_obj=
new l2r_l2_svc_fun(&prob, Cs);
206 solve_l2r_lr_dual(&prob,
epsilon, Cp, Cn);
210 SG_ERROR(
"Error: unknown solver_type\n");
250 #define GETI(i) (y[i]+1)
253 void CLibLinear::solve_l2r_l1l2_svc(
257 int w_size = prob->n;
270 double PGmax_new, PGmin_new;
273 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
288 for(i=0; i<w_size; i++)
302 QD[i] = diag[
GETI(i)];
304 QD[i] += prob->x->
dot(i, prob->x,i);
318 for (i=0; i<active_size; i++)
320 int j = i+rand()%(active_size-i);
324 for (s=0;s<active_size;s++)
329 G = prob->x->dense_dot(i,
w.
vector, n);
338 C = upper_bound[
GETI(i)];
339 G += alpha[i]*diag[
GETI(i)];
354 else if (alpha[i] == C)
372 if(fabs(PG) > 1.0e-12)
374 double alpha_old = alpha[i];
376 d = (alpha[i] - alpha_old)*yi;
378 prob->x->add_to_dense_vec(d, i,
w.
vector, n);
401 PGmax_old = PGmax_new;
402 PGmin_old = PGmin_new;
410 SG_INFO(
"optimization finished, #iter = %d\n",iter);
413 SG_WARNING(
"reaching max number of iterations\nUsing -s 2 may be faster"
414 "(also see liblinear FAQ)\n\n");
421 for(i=0; i<w_size; i++)
425 v += alpha[i]*(alpha[i]*diag[
GETI(i)] - 2);
429 SG_INFO(
"Objective value = %lf\n",v/2);
450 #define GETI(i) (y[i]+1)
453 void CLibLinear::solve_l1r_l2_svc(
454 problem *prob_col,
double eps,
double Cp,
double Cn)
457 int w_size = prob_col->n;
459 int active_size = w_size;
460 int max_num_linesearch = 20;
463 double d, G_loss, G,
H;
467 double d_old, d_diff;
468 double loss_old=0, loss_new;
469 double appxcond, cond;
474 double *xj_sq =
SG_MALLOC(
double, w_size);
481 double C[3] = {Cn,0,Cp};
484 if (prob_col->use_bias)
490 if(prob_col->y[j] > 0)
496 for(j=0; j<w_size; j++)
502 if (use_bias && j==n)
504 for (ind=0; ind<l; ind++)
505 xj_sq[n] += C[
GETI(ind)];
510 while (x->get_next_feature(ind, val, iterator))
511 xj_sq[j] += C[
GETI(ind)]*val*val;
512 x->free_feature_iterator(iterator);
520 if (m_max_train_time > 0 && start_time.
cur_time_diff() > m_max_train_time)
525 for(j=0; j<active_size; j++)
527 int i = j+rand()%(active_size-j);
531 for(s=0; s<active_size; s++)
537 if (use_bias && j==n)
539 for (ind=0; ind<l; ind++)
543 double tmp = C[
GETI(ind)]*y[ind];
544 G_loss -= tmp*b[ind];
551 iterator=x->get_feature_iterator(j);
553 while (x->get_next_feature(ind, val, iterator))
557 double tmp = C[
GETI(ind)]*val*y[ind];
558 G_loss -= tmp*b[ind];
562 x->free_feature_iterator(iterator);
573 double violation = 0;
580 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
588 else if(w.vector[j] > 0)
589 violation = fabs(Gp);
591 violation = fabs(Gn);
596 if(Gp <=
H*w.vector[j])
598 else if(Gn >=
H*w.vector[j])
603 if(fabs(d) < 1.0e-12)
606 double delta = fabs(w.vector[j]+d)-fabs(w.vector[j]) + G*d;
609 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
612 cond = fabs(w.vector[j]+d)-fabs(w.vector[j]) - sigma*
delta;
614 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
617 if (use_bias && j==n)
619 for (ind=0; ind<l; ind++)
620 b[ind] += d_diff*y[ind];
625 iterator=x->get_feature_iterator(j);
626 while (x->get_next_feature(ind, val, iterator))
627 b[ind] += d_diff*val*y[ind];
629 x->free_feature_iterator(iterator);
634 if(num_linesearch == 0)
639 if (use_bias && j==n)
641 for (ind=0; ind<l; ind++)
644 loss_old += C[
GETI(ind)]*b[ind]*b[ind];
645 double b_new = b[ind] + d_diff*y[ind];
648 loss_new += C[
GETI(ind)]*b_new*b_new;
653 iterator=x->get_feature_iterator(j);
654 while (x->get_next_feature(ind, val, iterator))
657 loss_old += C[
GETI(ind)]*b[ind]*b[ind];
658 double b_new = b[ind] + d_diff*val*y[ind];
661 loss_new += C[
GETI(ind)]*b_new*b_new;
663 x->free_feature_iterator(iterator);
669 if (use_bias && j==n)
671 for (ind=0; ind<l; ind++)
673 double b_new = b[ind] + d_diff*y[ind];
676 loss_new += C[
GETI(ind)]*b_new*b_new;
681 iterator=x->get_feature_iterator(j);
682 while (x->get_next_feature(ind, val, iterator))
684 double b_new = b[ind] + d_diff*val*y[ind];
687 loss_new += C[
GETI(ind)]*b_new*b_new;
689 x->free_feature_iterator(iterator);
693 cond = cond + loss_new - loss_old;
707 if(num_linesearch >= max_num_linesearch)
710 for(
int i=0; i<l; i++)
713 for(
int i=0; i<n; i++)
718 iterator=x->get_feature_iterator(i);
719 while (x->get_next_feature(ind, val, iterator))
720 b[ind] -= w.vector[i]*val*y[ind];
721 x->free_feature_iterator(iterator);
724 if (use_bias && w.vector[n])
726 for (ind=0; ind<l; ind++)
727 b[ind] -= w.vector[n]*y[ind];
733 Gmax_init = Gmax_new;
739 if(Gmax_new <= eps*Gmax_init)
741 if(active_size == w_size)
745 active_size = w_size;
755 SG_INFO(
"optimization finished, #iter = %d\n", iter);
756 if(iter >= max_iterations)
757 SG_WARNING(
"\nWARNING: reaching max number of iterations\n");
763 for(j=0; j<w_size; j++)
767 v += fabs(w.vector[j]);
773 v += C[
GETI(j)]*b[j]*b[j];
775 SG_INFO(
"Objective value = %lf\n", v);
776 SG_INFO(
"#nonzeros/#features = %d/%d\n", nnz, w_size);
796 #define GETI(i) (y[i]+1)
799 void CLibLinear::solve_l1r_lr(
800 const problem *prob_col,
double eps,
801 double Cp,
double Cn)
804 int w_size = prob_col->n;
806 int active_size = w_size;
807 int max_num_linesearch = 20;
815 double sum1, appxcond1;
816 double sum2, appxcond2;
822 double *exp_wTx_new =
SG_MALLOC(
double, l);
823 double *xj_max =
SG_MALLOC(
double, w_size);
824 double *C_sum =
SG_MALLOC(
double, w_size);
825 double *xjneg_sum =
SG_MALLOC(
double, w_size);
826 double *xjpos_sum =
SG_MALLOC(
double, w_size);
833 double C[3] = {Cn,0,Cp};
836 if (prob_col->use_bias)
842 if(prob_col->y[j] > 0)
847 for(j=0; j<w_size; j++)
858 for (ind=0; ind<l; ind++)
862 C_sum[j] += C[
GETI(ind)];
864 xjneg_sum[j] += C[
GETI(ind)];
866 xjpos_sum[j] += C[
GETI(ind)];
876 C_sum[j] += C[
GETI(ind)];
878 xjneg_sum[j] += C[
GETI(ind)]*val;
880 xjpos_sum[j] += C[
GETI(ind)]*val;
894 for(j=0; j<active_size; j++)
896 int i = j+rand()%(active_size-j);
900 for(s=0; s<active_size; s++)
909 for (ind=0; ind<l; ind++)
911 double exp_wTxind = exp_wTx[ind];
912 double tmp1 = 1.0/(1+exp_wTxind);
913 double tmp2 = C[
GETI(ind)]*tmp1;
914 double tmp3 = tmp2*exp_wTxind;
925 double exp_wTxind = exp_wTx[ind];
926 double tmp1 = val/(1+exp_wTxind);
927 double tmp2 = C[
GETI(ind)]*tmp1;
928 double tmp3 = tmp2*exp_wTxind;
936 G = -sum2 + xjneg_sum[j];
940 double violation = 0;
947 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
956 violation = fabs(Gp);
958 violation = fabs(Gn);
970 if(fabs(d) < 1.0e-12)
977 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
983 double tmp = exp(d*xj_max[j]);
984 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
985 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
990 for (ind=0; ind<l; ind++)
991 exp_wTx[ind] *= exp(d);
998 exp_wTx[ind] *= exp(d*val);
1005 cond += d*xjneg_sum[j];
1011 for (ind=0; ind<l; ind++)
1013 double exp_dx = exp(d);
1014 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1015 cond += C[
GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1025 double exp_dx = exp(d*val);
1026 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1027 cond += C[
GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1038 for (ind=0; ind<l; ind++)
1040 exp_wTx[ind] = exp_wTx_new[i];
1049 exp_wTx[ind] = exp_wTx_new[i];
1066 if(num_linesearch >= max_num_linesearch)
1069 for(
int i=0; i<l; i++)
1072 for(
int i=0; i<w_size; i++)
1078 for (ind=0; ind<l; ind++)
1085 exp_wTx[ind] +=
w.
vector[i]*val;
1090 for(
int i=0; i<l; i++)
1091 exp_wTx[i] = exp(exp_wTx[i]);
1096 Gmax_init = Gmax_new;
1100 if(Gmax_new <= eps*Gmax_init)
1102 if(active_size == w_size)
1106 active_size = w_size;
1112 Gmax_old = Gmax_new;
1116 SG_INFO(
"optimization finished, #iter = %d\n", iter);
1118 SG_WARNING(
"\nWARNING: reaching max number of iterations\n");
1124 for(j=0; j<w_size; j++)
1132 v += C[
GETI(j)]*log(1+1/exp_wTx[j]);
1134 v += C[
GETI(j)]*log(1+exp_wTx[j]);
1136 SG_INFO(
"Objective value = %lf\n", v);
1137 SG_INFO(
"#nonzeros/#features = %d/%d\n", nnz, w_size);
1168 #define GETI(i) (y[i]+1)
1171 void CLibLinear::solve_l2r_lr_dual(
const problem *prob,
double eps,
double Cp,
double Cn)
1174 int w_size = prob->n;
1176 double *xTx =
new double[l];
1177 int max_iter = 1000;
1178 int *index =
new int[l];
1179 double *alpha =
new double[2*l];
1181 int max_inner_iter = 100;
1182 double innereps = 1e-2;
1184 double upper_bound[3] = {Cn, 0, Cp};
1185 double Gmax_init = 0;
1205 alpha[2*i+1] = upper_bound[
GETI(i)] - alpha[2*i];
1208 for(i=0; i<w_size; i++)
1212 xTx[i] = prob->x->
dot(i, prob->x,i);
1213 prob->x->add_to_dense_vec(y[i]*alpha[2*i], i,
w.
vector, w_size);
1217 w.
vector[w_size]+=y[i]*alpha[2*i];
1223 while (iter < max_iter)
1227 int j = i+rand()%(l-i);
1230 int newton_iter = 0;
1236 double C = upper_bound[
GETI(i)];
1237 double ywTx = 0, xisq = xTx[i];
1239 ywTx = prob->x->dense_dot(i,
w.
vector, w_size);
1244 double a = xisq, b = ywTx;
1247 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1248 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1256 double alpha_old = alpha[ind1];
1257 double z = alpha_old;
1260 double gp = a*(z-alpha_old)+sign*b+
CMath::log(z/(C-z));
1264 const double eta = 0.1;
1266 while (inner_iter <= max_inner_iter)
1268 if(fabs(gp) < innereps)
1270 double gpp = a + C/(C-z)/z;
1271 double tmpz = z - gp/gpp;
1276 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1286 prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i,
w.
vector, w_size);
1289 w.
vector[w_size]+=sign*(z-alpha_old)*yi;
1302 if(newton_iter <= l/10)
1303 innereps =
CMath::max(innereps_min, 0.1*innereps);
1308 SG_INFO(
"optimization finished, #iter = %d\n",iter);
1309 if (iter >= max_iter)
1310 SG_WARNING(
"reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
1315 for(i=0; i<w_size; i++)
1319 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1320 - upper_bound[
GETI(i)] * log(upper_bound[
GETI(i)]);
1321 SG_INFO(
"Objective value = %lf\n", v);
1333 SG_ERROR(
"Please assign labels first!\n");
1337 if (num_labels!=linear_term.
vlen)
1339 SG_ERROR(
"Number of labels (%d) does not match number"
1340 " of entries (%d) in linear term \n", num_labels,
1350 SG_ERROR(
"Please assign linear term first!\n");
1358 SG_ERROR(
"Please assign labels first!\n");
1364 #endif //HAVE_LAPACK