22 using namespace shogun;
51 void CLibLinear::init()
85 SG_ERROR(
"Specified features are not of type CDotFeatures\n")
99 if (num_feat!=num_train_labels)
101 SG_ERROR(
"L1 methods require the data to be transposed: "
102 "number of features %d does not match number of "
103 "training labels %d\n",
104 num_feat, num_train_labels);
111 if (num_vec!=num_train_labels)
113 SG_ERROR(
"number of vectors %d does not match "
114 "number of training labels %d\n",
115 num_vec, num_train_labels);
123 liblinear_problem prob;
136 prob.y=SG_MALLOC(
double, prob.l);
142 for (int32_t i=0; i<prob.l; i++)
147 else if (prob.y[i] == -1)
150 SG_ERROR(
"labels should be +1/-1 only\n")
155 for(
int i=0;i<prob.l;i++)
162 SG_INFO(
"%d training points %d dims\n", prob.l, prob.n)
164 function *fun_obj=NULL;
169 fun_obj=
new l2r_lr_fun(&prob, Cs);
171 SG_DEBUG(
"starting L2R_LR training via tron\n")
179 fun_obj=
new l2r_l2_svc_fun(&prob, Cs);
205 solve_l2r_lr_dual(&prob,
epsilon, Cp, Cn);
209 SG_ERROR(
"Error: unknown solver_type\n")
249 #define GETI(i) (y[i]+1)
252 void CLibLinear::solve_l2r_l1l2_svc(
256 int w_size = prob->n;
259 double *QD = SG_MALLOC(
double, l);
260 int *index = SG_MALLOC(
int, l);
261 double *alpha = SG_MALLOC(
double, l);
262 int32_t *y = SG_MALLOC(int32_t, l);
269 double PGmax_new, PGmin_new;
272 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
287 for(i=0; i<w_size; i++)
301 QD[i] = diag[
GETI(i)];
303 QD[i] += prob->x->
dot(i, prob->x,i);
317 for (i=0; i<active_size; i++)
323 for (s=0;s<active_size;s++)
328 G = prob->x->dense_dot(i,
w.
vector, n);
337 C = upper_bound[
GETI(i)];
338 G += alpha[i]*diag[
GETI(i)];
353 else if (alpha[i] == C)
371 if(fabs(PG) > 1.0e-12)
373 double alpha_old = alpha[i];
375 d = (alpha[i] - alpha_old)*yi;
377 prob->x->add_to_dense_vec(d, i,
w.
vector, n);
400 PGmax_old = PGmax_new;
401 PGmin_old = PGmin_new;
409 SG_INFO("optimization finished,
#iter = %d\n",iter)
412 SG_WARNING(
"reaching max number of iterations\nUsing -s 2 may be faster"
413 "(also see liblinear FAQ)\n\n");
420 for(i=0; i<w_size; i++)
424 v += alpha[i]*(alpha[i]*diag[
GETI(i)] - 2);
428 SG_INFO(
"Objective value = %lf\n",v/2)
449 #define GETI(i) (y[i]+1)
452 void CLibLinear::solve_l1r_l2_svc(
453 liblinear_problem *prob_col,
double eps,
double Cp,
double Cn)
456 int w_size = prob_col->n;
458 int active_size = w_size;
459 int max_num_linesearch = 20;
462 double d, G_loss, G,
H;
466 double d_old, d_diff;
467 double loss_old=0, loss_new;
468 double appxcond, cond;
470 int *index = SG_MALLOC(
int, w_size);
471 int32_t *y = SG_MALLOC(int32_t, l);
472 double *b = SG_MALLOC(
double, l);
473 double *xj_sq = SG_MALLOC(
double, w_size);
480 double C[3] = {Cn,0,Cp};
483 if (prob_col->use_bias)
489 if(prob_col->y[j] > 0)
495 for(j=0; j<w_size; j++)
503 for (ind=0; ind<l; ind++)
504 xj_sq[n] += C[
GETI(ind)];
508 iterator=x->get_feature_iterator(j);
509 while (x->get_next_feature(ind, val, iterator))
510 xj_sq[j] += C[
GETI(ind)]*val*val;
511 x->free_feature_iterator(iterator);
524 for(j=0; j<active_size; j++)
530 for(s=0; s<active_size; s++)
538 for (ind=0; ind<l; ind++)
542 double tmp = C[
GETI(ind)]*y[ind];
543 G_loss -= tmp*b[ind];
550 iterator=x->get_feature_iterator(j);
552 while (x->get_next_feature(ind, val, iterator))
556 double tmp = C[
GETI(ind)]*val*y[ind];
557 G_loss -= tmp*b[ind];
561 x->free_feature_iterator(iterator);
572 double violation = 0;
579 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
588 violation = fabs(Gp);
590 violation = fabs(Gn);
602 if(fabs(d) < 1.0e-12)
608 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
613 appxcond = xj_sq[j]*d*d + G_loss*d + cond;
618 for (ind=0; ind<l; ind++)
619 b[ind] += d_diff*y[ind];
624 iterator=x->get_feature_iterator(j);
625 while (x->get_next_feature(ind, val, iterator))
626 b[ind] += d_diff*val*y[ind];
628 x->free_feature_iterator(iterator);
633 if(num_linesearch == 0)
640 for (ind=0; ind<l; ind++)
643 loss_old += C[
GETI(ind)]*b[ind]*b[ind];
644 double b_new = b[ind] + d_diff*y[ind];
647 loss_new += C[
GETI(ind)]*b_new*b_new;
652 iterator=x->get_feature_iterator(j);
653 while (x->get_next_feature(ind, val, iterator))
656 loss_old += C[
GETI(ind)]*b[ind]*b[ind];
657 double b_new = b[ind] + d_diff*val*y[ind];
660 loss_new += C[
GETI(ind)]*b_new*b_new;
662 x->free_feature_iterator(iterator);
670 for (ind=0; ind<l; ind++)
672 double b_new = b[ind] + d_diff*y[ind];
675 loss_new += C[
GETI(ind)]*b_new*b_new;
680 iterator=x->get_feature_iterator(j);
681 while (x->get_next_feature(ind, val, iterator))
683 double b_new = b[ind] + d_diff*val*y[ind];
686 loss_new += C[
GETI(ind)]*b_new*b_new;
688 x->free_feature_iterator(iterator);
692 cond = cond + loss_new - loss_old;
706 if(num_linesearch >= max_num_linesearch)
709 for(
int i=0; i<l; i++)
712 for(
int i=0; i<n; i++)
717 iterator=x->get_feature_iterator(i);
718 while (x->get_next_feature(ind, val, iterator))
719 b[ind] -=
w.
vector[i]*val*y[ind];
720 x->free_feature_iterator(iterator);
725 for (ind=0; ind<l; ind++)
732 Gmax_init = Gmax_new;
738 if(Gmax_new <= eps*Gmax_init)
740 if(active_size == w_size)
744 active_size = w_size;
754 SG_INFO("optimization finished,
#iter = %d\n", iter)
756 SG_WARNING(
"\nWARNING: reaching max number of iterations\n")
762 for(j=0; j<w_size; j++)
772 v += C[
GETI(j)]*b[j]*b[j];
774 SG_INFO(
"Objective value = %lf\n", v)
775 SG_INFO("
#nonzeros/#features = %d/%d\n", nnz, w_size)
795 #define GETI(i) (y[i]+1)
798 void CLibLinear::solve_l1r_lr(
799 const liblinear_problem *prob_col,
double eps,
800 double Cp,
double Cn)
803 int w_size = prob_col->n;
805 int active_size = w_size;
806 int max_num_linesearch = 20;
814 double sum1, appxcond1;
815 double sum2, appxcond2;
818 int *index = SG_MALLOC(
int, w_size);
819 int32_t *y = SG_MALLOC(int32_t, l);
820 double *exp_wTx = SG_MALLOC(
double, l);
821 double *exp_wTx_new = SG_MALLOC(
double, l);
822 double *xj_max = SG_MALLOC(
double, w_size);
823 double *C_sum = SG_MALLOC(
double, w_size);
824 double *xjneg_sum = SG_MALLOC(
double, w_size);
825 double *xjpos_sum = SG_MALLOC(
double, w_size);
832 double C[3] = {Cn,0,Cp};
835 if (prob_col->use_bias)
841 if(prob_col->y[j] > 0)
846 for(j=0; j<w_size; j++)
857 for (ind=0; ind<l; ind++)
861 C_sum[j] += C[
GETI(ind)];
863 xjneg_sum[j] += C[
GETI(ind)];
865 xjpos_sum[j] += C[
GETI(ind)];
875 C_sum[j] += C[
GETI(ind)];
877 xjneg_sum[j] += C[
GETI(ind)]*val;
879 xjpos_sum[j] += C[
GETI(ind)]*val;
893 for(j=0; j<active_size; j++)
899 for(s=0; s<active_size; s++)
908 for (ind=0; ind<l; ind++)
910 double exp_wTxind = exp_wTx[ind];
911 double tmp1 = 1.0/(1+exp_wTxind);
912 double tmp2 = C[
GETI(ind)]*tmp1;
913 double tmp3 = tmp2*exp_wTxind;
924 double exp_wTxind = exp_wTx[ind];
925 double tmp1 = val/(1+exp_wTxind);
926 double tmp2 = C[
GETI(ind)]*tmp1;
927 double tmp3 = tmp2*exp_wTxind;
935 G = -sum2 + xjneg_sum[j];
939 double violation = 0;
946 else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
955 violation = fabs(Gp);
957 violation = fabs(Gn);
969 if(fabs(d) < 1.0e-12)
976 for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
982 double tmp = exp(d*xj_max[j]);
983 appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
984 appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
989 for (ind=0; ind<l; ind++)
990 exp_wTx[ind] *= exp(d);
997 exp_wTx[ind] *= exp(d*val);
1004 cond += d*xjneg_sum[j];
1010 for (ind=0; ind<l; ind++)
1012 double exp_dx = exp(d);
1013 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1014 cond += C[
GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1024 double exp_dx = exp(d*val);
1025 exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
1026 cond += C[
GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
1037 for (ind=0; ind<l; ind++)
1039 exp_wTx[ind] = exp_wTx_new[i];
1048 exp_wTx[ind] = exp_wTx_new[i];
1065 if(num_linesearch >= max_num_linesearch)
1068 for(
int i=0; i<l; i++)
1071 for(
int i=0; i<w_size; i++)
1077 for (ind=0; ind<l; ind++)
1084 exp_wTx[ind] +=
w.
vector[i]*val;
1089 for(
int i=0; i<l; i++)
1090 exp_wTx[i] = exp(exp_wTx[i]);
1095 Gmax_init = Gmax_new;
1099 if(Gmax_new <= eps*Gmax_init)
1101 if(active_size == w_size)
1105 active_size = w_size;
1111 Gmax_old = Gmax_new;
1115 SG_INFO("optimization finished,
#iter = %d\n", iter)
1117 SG_WARNING(
"\nWARNING: reaching max number of iterations\n")
1123 for(j=0; j<w_size; j++)
1124 if(
w.vector[j] != 0)
1131 v += C[
GETI(j)]*log(1+1/exp_wTx[j]);
1133 v += C[
GETI(j)]*log(1+exp_wTx[j]);
1135 SG_INFO(
"Objective value = %lf\n", v)
1136 SG_INFO("
#nonzeros/#features = %d/%d\n", nnz, w_size)
1141 SG_FREE(exp_wTx_new);
1167 #define GETI(i) (y[i]+1)
1170 void CLibLinear::solve_l2r_lr_dual(
const liblinear_problem *prob,
double eps,
double Cp,
double Cn)
1173 int w_size = prob->n;
1175 double *xTx =
new double[l];
1176 int max_iter = 1000;
1177 int *index =
new int[l];
1178 double *alpha =
new double[2*l];
1179 int32_t *y = SG_MALLOC(int32_t, l);
1180 int max_inner_iter = 100;
1181 double innereps = 1e-2;
1183 double upper_bound[3] = {Cn, 0, Cp};
1184 double Gmax_init = 0;
1204 alpha[2*i+1] = upper_bound[
GETI(i)] - alpha[2*i];
1207 for(i=0; i<w_size; i++)
1211 xTx[i] = prob->x->
dot(i, prob->x,i);
1212 prob->x->add_to_dense_vec(y[i]*alpha[2*i], i,
w.
vector, w_size);
1216 w.
vector[w_size]+=y[i]*alpha[2*i];
1222 while (iter < max_iter)
1229 int newton_iter = 0;
1235 double C = upper_bound[
GETI(i)];
1236 double ywTx = 0, xisq = xTx[i];
1238 ywTx = prob->x->dense_dot(i,
w.
vector, w_size);
1243 double a = xisq, b = ywTx;
1246 int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
1247 if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
1255 double alpha_old = alpha[ind1];
1256 double z = alpha_old;
1259 double gp = a*(z-alpha_old)+sign*b+
CMath::log(z/(C-z));
1263 const double eta = 0.1;
1265 while (inner_iter <= max_inner_iter)
1267 if(fabs(gp) < innereps)
1269 double gpp = a + C/(C-z)/z;
1270 double tmpz = z - gp/gpp;
1275 gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
1285 prob->x->add_to_dense_vec(sign*(z-alpha_old)*yi, i,
w.
vector, w_size);
1288 w.
vector[w_size]+=sign*(z-alpha_old)*yi;
1301 if(newton_iter <= l/10)
1302 innereps =
CMath::max(innereps_min, 0.1*innereps);
1307 SG_INFO(
"optimization finished, #iter = %d\n",iter)
1308 if (iter >= max_iter)
1309 SG_WARNING(
"reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n")
1314 for(i=0; i<w_size; i++)
1318 v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
1319 - upper_bound[
GETI(i)] * log(upper_bound[
GETI(i)]);
1320 SG_INFO(
"Objective value = %lf\n", v)
1332 SG_ERROR(
"Please assign labels first!\n")
1336 if (num_labels!=linear_term.
vlen)
1338 SG_ERROR(
"Number of labels (%d) does not match number"
1339 " of entries (%d) in linear term \n", num_labels,
1349 SG_ERROR(
"Please assign linear term first!\n")
1357 SG_ERROR(
"Please assign labels first!\n")