19 using namespace shogun;
22 mkl_block_norm(1),beta_local(NULL), mkl_iterations(0), mkl_epsilon(1e-5),
23 interleaved_optimization(true), w_gap(1.0), rho(0)
35 SG_DEBUG(
"creating MKL object %p\n",
this);
44 SG_DEBUG(
"deleting MKL object %p\n",
this);
72 SG_INFO(
"trying to initialize CPLEX\n") ;
75 env = CPXopenCPLEX (&status);
80 SG_WARNING(
"Could not open CPLEX environment.\n");
81 CPXgeterrorstring (
env, status, errmsg);
89 status = CPXsetintparam (
env, CPX_PARAM_LPMETHOD, CPX_ALG_DUAL);
92 SG_ERROR(
"Failure to select dual lp optimization, error %d.\n", status);
96 status = CPXsetintparam (
env, CPX_PARAM_DATACHECK, CPX_ON);
99 SG_ERROR(
"Failure to turn on data checking, error %d.\n", status);
106 SG_ERROR(
"Failed to create LP.\n");
128 SG_WARNING(
"CPXfreeprob failed, error code %d.\n", status);
135 int32_t status = CPXcloseCPLEX (&
env);
141 SG_WARNING(
"Could not close CPLEX environment.\n");
142 CPXgeterrorstring (
env, status, errmsg);
156 lpx_set_obj_dir(
lp_glpk, LPX_MIN);
157 lpx_set_int_parm(
lp_glpk, LPX_K_DUAL, GLP_ON );
158 lpx_set_int_parm(
lp_glpk, LPX_K_PRESOL, GLP_ON );
160 glp_term_out(GLP_OFF);
175 int status = lpx_get_status(lp);
177 if (status==LPX_INFEAS)
179 SG_PRINT(
"solution is infeasible!\n");
182 else if(status==LPX_NOFEAS)
184 SG_PRINT(
"problem has no feasible solution!\n");
200 SG_ERROR(
"%s::train_machine(): Number of training vectors (%d) does"
201 " not match number of labels (%d)\n",
get_name(),
209 SG_ERROR(
"No constraint generator (SVM) set\n");
226 int32_t num_weights = -1;
228 SG_INFO(
"num_kernels = %d\n", num_kernels);
231 ASSERT(num_weights==num_kernels);
294 SG_ERROR(
"Interleaved MKL optimization is currently "
295 "only supported with SVMlight\n");
308 #ifdef USE_REFERENCE_COUNTING
309 int32_t refs=this->ref();
315 #ifdef USE_REFERENCE_COUNTING
335 SG_SWARNING(
"MKL Algorithm terminates PREMATURELY due to current training time exceeding get_max_train_time ()= %f . It may have not converged yet!\n",
get_max_train_time ());
358 for (int32_t i=0; i<nsv; i++)
371 SG_ERROR(
"Norm must be >= 1, e.g., 1-norm is the standard MKL; norms>1 nonsparse MKL\n");
378 if (lambda>1 || lambda<0)
383 else if (lambda==1.0)
402 SG_SWARNING(
"MKL Algorithm terminates PREMATURELY due to current training time exceeding get_max_train_time ()= %f . It may have not converged yet!\n",
get_max_train_time ());
409 ASSERT(nweights==num_kernels);
412 int32_t inner_iters=0;
416 for (int32_t i=0; i<num_kernels; i++)
419 mkl_objective+=old_beta[i]*sumw[i];
453 SG_ERROR(
"Solver type not supported (not compiled in?)\n");
474 int32_t nofKernelsGood;
477 nofKernelsGood = num_kernels;
480 for (p=0; p<num_kernels; ++p )
482 if (sumw[p] >= 0.0 && old_beta[p] >= 0.0 )
484 beta[p] =
CMath::sqrt(sumw[p]*old_beta[p]*old_beta[p]);
501 for( p=0; p<num_kernels; ++p )
506 SG_PRINT(
"MKL-direct: p = %.3f\n", 1.0 );
507 SG_PRINT(
"MKL-direct: nofKernelsGood = %d\n", nofKernelsGood );
508 SG_PRINT(
"MKL-direct: Z = %e\n", Z );
509 SG_PRINT(
"MKL-direct: eps = %e\n", epsRegul );
510 for( p=0; p<num_kernels; ++p )
515 SG_PRINT(
"MKL-direct: preR = %e\n", preR );
516 SG_PRINT(
"MKL-direct: preR/p = %e\n", preR );
518 SG_PRINT(
"MKL-direct: R = %e\n", R );
519 SG_ERROR(
"Assertion R >= 0 failed!\n" );
523 for( p=0; p<num_kernels; ++p )
531 for( p=0; p<num_kernels; ++p )
540 for( p=0; p<num_kernels; ++p )
548 for (p=0; p<num_kernels; ++p )
551 obj += sumw[p] * beta[p];
560 std::list<int32_t> I;
562 for (int32_t i=0; i<len;i++)
572 for (std::list<int32_t>::iterator it=I.begin(); it!=I.end(); it++)
600 for (int32_t i=0; i<n; i++)
604 for (int32_t j=0; j<n; j++)
641 }
while(ff>ff_old+1e-4*gg_old*(del-del_old));
653 SG_ERROR(
"cannot compute objective, labels or kernel not set\n");
668 for( p=0; p<num_kernels; ++p )
682 for( p=0; p<num_kernels; ++p )
687 for( p=0; p<num_kernels; ++p )
688 obj += sumw[p] * beta[p];
704 int32_t nofKernelsGood;
707 nofKernelsGood = num_kernels;
708 for( p=0; p<num_kernels; ++p )
711 if( sumw[p] >= 0.0 && old_beta[p] >= 0.0 )
713 beta[p] = sumw[p] * old_beta[p]*old_beta[p] /
mkl_norm;
726 for( p=0; p<num_kernels; ++p )
731 for( p=0; p<num_kernels; ++p )
736 for( p=0; p<num_kernels; ++p )
737 preR +=
CMath::sq( old_beta[p] - beta[p]);
743 SG_PRINT(
"MKL-direct: nofKernelsGood = %d\n", nofKernelsGood );
744 SG_PRINT(
"MKL-direct: Z = %e\n", Z );
745 SG_PRINT(
"MKL-direct: eps = %e\n", epsRegul );
746 for( p=0; p<num_kernels; ++p )
749 SG_PRINT(
"MKL-direct: t[%3d] = %e ( diff = %e = %e - %e )\n", p, t, old_beta[p]-beta[p], old_beta[p], beta[p] );
751 SG_PRINT(
"MKL-direct: preR = %e\n", preR );
754 SG_PRINT(
"MKL-direct: R = %e\n", R );
755 SG_ERROR(
"Assertion R >= 0 failed!\n" );
759 for( p=0; p<num_kernels; ++p )
767 for( p=0; p<num_kernels; ++p )
777 for( p=0; p<num_kernels; ++p )
778 obj += sumw[p] * beta[p];
784 const float64_t* old_beta, int32_t num_kernels,
791 SG_ERROR(
"MKL via NEWTON works only for norms>1\n");
793 const double epsBeta = 1e-32;
794 const double epsGamma = 1e-12;
795 const double epsWsq = 1e-12;
796 const double epsNewt = 0.0001;
797 const double epsStep = 1e-9;
798 const int nofNewtonSteps = 3;
799 const double hessRidge = 1e-6;
800 const int inLogSpace = 0;
815 for( p=0; p<num_kernels; ++p )
817 beta[p] = old_beta[p];
818 if( !( beta[p] >= epsBeta ) )
821 ASSERT( 0.0 <= beta[p] && beta[p] <= 1.0 );
826 if( !( fabs(Z-1.0) <= epsGamma ) )
828 SG_WARNING(
"old_beta not normalized (diff=%e); forcing normalization. ", Z-1.0 );
829 for( p=0; p<num_kernels; ++p )
834 ASSERT( 0.0 <= beta[p] && beta[p] <= 1.0 );
840 for ( p=0; p<num_kernels; ++p )
842 if ( !( sumw[p] >= 0 ) )
844 if( !( sumw[p] >= -epsWsq ) )
845 SG_WARNING(
"sumw[%d] = %e; treated as 0. ", p, sumw[p] );
857 if( !( gamma > epsGamma ) )
859 SG_WARNING(
"bad gamma: %e; set to %e. ", gamma, epsGamma );
863 ASSERT( gamma >= epsGamma );
868 for( p=0; p<num_kernels; ++p )
870 obj += beta[p] * sumw[p];
873 if( !( obj >= 0.0 ) )
874 SG_WARNING(
"negative objective: %e. ", obj );
879 for (i = 0; i < nofNewtonSteps; ++i )
884 for( p=0; p<num_kernels; ++p )
886 ASSERT( 0.0 <= beta[p] && beta[p] <= 1.0 );
890 const float halfw2p = ( sumw[p] >= 0.0 ) ? (sumw[p]*old_beta[p]*old_beta[p]) : 0.0;
892 const float64_t t1 = ( t0 < 0 ) ? 0.0 : t0;
895 newtDir[p] = t1 / ( t1 + t2*beta[p] + hessRidge );
897 newtDir[p] = ( t1 == 0.0 ) ? 0.0 : ( t1 / t2 );
899 ASSERT( newtDir[p] == newtDir[p] );
907 while( stepSize >= epsStep )
913 for( p=0; p<num_kernels; ++p )
916 newtBeta[p] = beta[p] *
CMath::exp( + stepSize * newtDir[p] );
918 newtBeta[p] = beta[p] + stepSize * newtDir[p];
919 if( !( newtBeta[p] >= epsBeta ) )
920 newtBeta[p] = epsBeta;
932 for( p=0; p<num_kernels; ++p )
935 if( newtBeta[p] > 1.0 )
940 ASSERT( 0.0 <= newtBeta[p] && newtBeta[p] <= 1.0 );
946 for( p=0; p<num_kernels; ++p )
947 newtObj += sumw[p] * old_beta[p]*old_beta[p] / newtBeta[p];
949 if ( newtObj < obj - epsNewt*stepSize*obj )
951 for( p=0; p<num_kernels; ++p )
952 beta[p] = newtBeta[p];
960 if( stepSize < epsStep )
968 for( p=0; p<num_kernels; ++p )
969 obj += beta[p] * sumw[p];
984 int32_t NUMCOLS = 2*num_kernels + 1;
995 for (int32_t i=0; i<2*num_kernels; i++)
1002 for (int32_t i=num_kernels; i<2*num_kernels; i++)
1005 obj[2*num_kernels]=1 ;
1006 lb[2*num_kernels]=-CPX_INFBOUND ;
1007 ub[2*num_kernels]=CPX_INFBOUND ;
1009 int status = CPXnewcols (
env,
lp_cplex, NUMCOLS, obj, lb, ub, NULL, NULL);
1012 CPXgeterrorstring (
env, status, errmsg);
1017 SG_INFO(
"adding the first row\n");
1018 int initial_rmatbeg[1];
1019 int initial_rmatind[num_kernels+1];
1020 double initial_rmatval[num_kernels+1];
1021 double initial_rhs[1];
1022 char initial_sense[1];
1027 initial_rmatbeg[0] = 0;
1029 initial_sense[0]=
'E' ;
1032 for (int32_t i=0; i<num_kernels; i++)
1034 initial_rmatind[i]=i ;
1035 initial_rmatval[i]=1 ;
1037 initial_rmatind[num_kernels]=2*num_kernels ;
1038 initial_rmatval[num_kernels]=0 ;
1040 status = CPXaddrows (
env,
lp_cplex, 0, 1, num_kernels+1,
1041 initial_rhs, initial_sense, initial_rmatbeg,
1042 initial_rmatind, initial_rmatval, NULL, NULL);
1047 initial_rmatbeg[0] = 0;
1049 initial_sense[0]=
'L' ;
1051 initial_rmatind[0]=2*num_kernels ;
1052 initial_rmatval[0]=0 ;
1055 initial_rhs, initial_sense, initial_rmatbeg,
1056 initial_rmatind, initial_rmatval, NULL, NULL);
1061 for (int32_t i=0; i<num_kernels; i++)
1063 initial_rmatind[i]=i ;
1064 initial_rmatval[i]=1 ;
1066 initial_rmatind[num_kernels]=2*num_kernels ;
1067 initial_rmatval[num_kernels]=0 ;
1069 status = CPXaddqconstr (
env,
lp_cplex, 0, num_kernels+1, 1.0,
'L', NULL, NULL,
1070 initial_rmatind, initial_rmatind, initial_rmatval, NULL);
1076 SG_ERROR(
"Failed to add the first row.\n");
1082 for (int32_t q=0; q<num_kernels-1; q++)
1099 rmatind[2]=num_kernels+q ;
1102 rhs, sense, rmatbeg,
1103 rmatind, rmatval, NULL, NULL);
1105 SG_ERROR(
"Failed to add a smothness row (1).\n");
1114 rmatind[2]=num_kernels+q ;
1117 rhs, sense, rmatbeg,
1118 rmatind, rmatval, NULL, NULL);
1120 SG_ERROR(
"Failed to add a smothness row (2).\n");
1129 int rmatind[num_kernels+1];
1130 double rmatval[num_kernels+1];
1142 for (int32_t i=0; i<num_kernels; i++)
1146 rmatval[i]=-(sumw[i]-suma) ;
1148 rmatval[i]=-sumw[i];
1150 rmatind[num_kernels]=2*num_kernels ;
1151 rmatval[num_kernels]=-1 ;
1153 int32_t status = CPXaddrows (
env,
lp_cplex, 0, 1, num_kernels+1,
1154 rhs, sense, rmatbeg,
1155 rmatind, rmatval, NULL, NULL);
1157 SG_ERROR(
"Failed to add the new row.\n");
1172 for (int32_t i=0; i<num_kernels; i++)
1173 beta[i]=old_beta[i];
1174 for (int32_t i=num_kernels; i<2*num_kernels+1; i++)
1182 CMath::scale_vector(1/CMath::qnorm(beta, num_kernels,
mkl_norm), beta, num_kernels);
1188 SG_ERROR(
"Failed to optimize Problem.\n");
1192 status=CPXsolution(
env,
lp_cplex, &solstat, &objval,
1193 (
double*) beta, NULL, NULL, NULL);
1197 CMath::display_vector(beta, num_kernels,
"beta");
1198 SG_ERROR(
"Failed to obtain solution.\n");
1201 CMath::scale_vector(1/CMath::qnorm(beta, num_kernels,
mkl_norm), beta, num_kernels);
1215 SG_ERROR(
"Failed to optimize Problem.\n");
1218 int32_t cur_numrows=(int32_t) CPXgetnumrows(
env,
lp_cplex);
1219 int32_t cur_numcols=(int32_t) CPXgetnumcols(
env,
lp_cplex);
1220 int32_t num_rows=cur_numrows;
1221 ASSERT(cur_numcols<=2*num_kernels+1);
1232 status=CPXsolution(
env,
lp_cplex, &solstat, &objval,
1233 (
double*) x, (
double*) pi, (
double*) slack, NULL);
1237 status=CPXsolution(
env,
lp_cplex, &solstat, &objval,
1238 (
double*) x, NULL, (
double*) slack, NULL);
1241 int32_t solution_ok = (!status) ;
1243 SG_ERROR(
"Failed to obtain solution.\n");
1245 int32_t num_active_rows=0 ;
1250 int32_t max_idx = -1 ;
1251 int32_t start_row = 1 ;
1253 start_row+=2*(num_kernels-1);
1255 for (int32_t i = start_row; i < cur_numrows; i++)
1263 if (slack[i]>max_slack)
1265 max_slack=slack[i] ;
1276 if (slack[i]>max_slack)
1278 max_slack=slack[i] ;
1286 if ( (num_rows-start_row>
CMath::max(100,2*num_active_rows)) && (max_idx!=-1))
1289 status = CPXdelrows (
env,
lp_cplex, max_idx, max_idx) ;
1291 SG_ERROR(
"Failed to remove an old row.\n");
1296 rho = -x[2*num_kernels] ;
1308 for (int32_t i=0; i<num_kernels; i++)
1313 SG_ERROR(
"Cplex not enabled at compile time\n");
1324 SG_ERROR(
"MKL via GLPK works only for norm=1\n");
1328 int32_t NUMCOLS = 2*num_kernels + 1 ;
1334 lpx_add_cols(
lp_glpk, NUMCOLS);
1335 for (
int i=1; i<=2*num_kernels; i++)
1337 lpx_set_obj_coef(
lp_glpk, i, 0);
1338 lpx_set_col_bnds(
lp_glpk, i, LPX_DB, 0, 1);
1340 for (
int i=num_kernels+1; i<=2*num_kernels; i++)
1344 lpx_set_obj_coef(
lp_glpk, NUMCOLS, 1);
1345 lpx_set_col_bnds(
lp_glpk, NUMCOLS, LPX_FR, 0,0);
1348 int row_index = lpx_add_rows(
lp_glpk, 1);
1349 int* ind =
SG_MALLOC(
int, num_kernels+2);
1351 for (
int i=1; i<=num_kernels; i++)
1356 ind[num_kernels+1] = NUMCOLS;
1357 val[num_kernels+1] = 0;
1358 lpx_set_mat_row(
lp_glpk, row_index, num_kernels, ind, val);
1359 lpx_set_row_bnds(
lp_glpk, row_index, LPX_FX, 1, 1);
1367 for (int32_t q=1; q<num_kernels; q++)
1371 int mat_row_index = lpx_add_rows(
lp_glpk, 2);
1376 mat_ind[3] = num_kernels+q;
1378 lpx_set_mat_row(
lp_glpk, mat_row_index, 3, mat_ind, mat_val);
1379 lpx_set_row_bnds(
lp_glpk, mat_row_index, LPX_UP, 0, 0);
1382 lpx_set_mat_row(
lp_glpk, mat_row_index+1, 3, mat_ind, mat_val);
1383 lpx_set_row_bnds(
lp_glpk, mat_row_index+1, LPX_UP, 0, 0);
1390 int row_index = lpx_add_rows(
lp_glpk, 1);
1391 for (int32_t i=1; i<=num_kernels; i++)
1394 val[i] = -(sumw[i-1]-suma);
1396 ind[num_kernels+1] = 2*num_kernels+1;
1397 val[num_kernels+1] = -1;
1398 lpx_set_mat_row(
lp_glpk, row_index, num_kernels+1, ind, val);
1399 lpx_set_row_bnds(
lp_glpk, row_index, LPX_UP, 0, 0);
1407 SG_ERROR(
"Failed to optimize Problem.\n");
1409 int32_t cur_numrows = lpx_get_num_rows(
lp_glpk);
1410 int32_t cur_numcols = lpx_get_num_cols(
lp_glpk);
1411 int32_t num_rows=cur_numrows;
1412 ASSERT(cur_numcols<=2*num_kernels+1);
1418 for (
int i=0; i<cur_numrows; i++)
1420 row_primal[i] = lpx_get_row_prim(
lp_glpk, i+1);
1421 row_dual[i] = lpx_get_row_dual(
lp_glpk, i+1);
1423 for (
int i=0; i<cur_numcols; i++)
1424 col_primal[i] = lpx_get_col_prim(
lp_glpk, i+1);
1426 obj = -col_primal[2*num_kernels];
1428 for (
int i=0; i<num_kernels; i++)
1429 beta[i] = col_primal[i];
1431 int32_t num_active_rows=0;
1435 int32_t max_idx = -1;
1436 int32_t start_row = 1;
1438 start_row += 2*(num_kernels-1);
1440 for (int32_t i= start_row; i<cur_numrows; i++)
1446 if (row_primal[i]<max_slack)
1448 max_slack = row_primal[i];
1454 if ((num_rows-start_row>
CMath::max(100, 2*num_active_rows)) && max_idx!=-1)
1457 del_rows[1] = max_idx+1;
1458 lpx_del_rows(
lp_glpk, 1, del_rows);
1466 SG_ERROR(
"Glpk not enabled at compile time\n");
1482 ASSERT(nweights==num_kernels);
1485 for (int32_t i=0; i<num_kernels; i++)
1491 for (int32_t n=0; n<num_kernels; n++)
1498 for (int32_t i=0; i<nsv; i++)
1502 for (int32_t j=0; j<nsv; j++)
1534 for (int32_t i=0; i<n; i++)
1538 for (int32_t j=0; j<n; j++)
1555 mkl_obj=-0.5*mkl_obj;
1562 SG_ERROR(
"cannot compute objective, labels or kernel not set\n");
1578 double const_term = 1-CMath::qsq(beta, num_kernels,
mkl_norm);
1581 ASSERT(CMath::fequal(const_term, 0.0));
1583 for (int32_t i=0; i<num_kernels; i++)
1587 lin_term[i]=grad_beta[i] - 2*beta[i]*hess_beta[i];
1588 const_term+=grad_beta[i]*beta[i] -
CMath::sq(beta[i])*hess_beta[i];
1591 ind[num_kernels]=2*num_kernels;
1592 hess_beta[num_kernels]=0;
1593 lin_term[num_kernels]=0;
1604 status = CPXaddqconstr (
env,
lp_cplex, num_kernels+1, num_kernels+1, const_term,
'L', ind, lin_term,
1605 ind, ind, hess_beta, NULL);