25 double compute_regularizer(
double* w,
double lambda,
double lambda2,
int n_vecs,
int n_feats,
26 int n_blocks,
const slep_options& options)
28 double regularizer = 0.0;
33 for (
int i=0; i<n_feats; i++)
35 double w_row_norm = 0.0;
36 for (
int t=0; t<n_blocks; t++)
37 w_row_norm +=
CMath::pow(w[i+t*n_feats],options.q);
38 regularizer +=
CMath::pow(w_row_norm,1.0/options.q);
40 regularizer *= lambda;
45 for (
int i=0; i<n_feats; i++)
47 double tree_norm = 0.0;
50 tree_norm = general_treeNorm(w+i, n_blocks, n_blocks, options.G, options.ind_t, options.n_nodes);
52 tree_norm = treeNorm(w+i, n_blocks, n_blocks, options.ind_t, options.n_nodes);
54 regularizer += tree_norm;
56 regularizer *= lambda;
61 for (
int t=0; t<n_blocks; t++)
63 double group_qpow_sum = 0.0;
64 int group_ind_start = options.ind[t];
65 int group_ind_end = options.ind[t+1];
66 for (
int i=group_ind_start; i<group_ind_end; i++)
69 regularizer += options.gWeight[t]*
CMath::pow(group_qpow_sum, 1.0/options.q);
71 regularizer *= lambda;
77 regularizer = general_treeNorm(w, 1, n_feats, options.G, options.ind_t, options.n_nodes);
79 regularizer = treeNorm(w, 1, n_feats, options.ind_t, options.n_nodes);
81 regularizer *= lambda;
86 for (
int i=0; i<n_feats; i++)
89 regularizer *= lambda;
95 for (
int i=0; i<n_feats; i++)
97 regularizer += lambda*l1;
99 for (
int i=1; i<n_feats; i++)
101 regularizer += lambda2*fuse;
108 double compute_lambda(
111 CDotFeatures* features,
113 int n_vecs,
int n_feats,
115 const slep_options& options)
117 double lambda_max = 0.0;
123 q_bar = CMath::ALMOST_INFTY;
124 else if (options.q>1e6)
127 q_bar = options.q/(options.q-1);
131 switch (options.mode)
133 case MULTITASK_GROUP:
136 for (
int t=0; t<n_blocks; t++)
138 SGVector<index_t> task_idx = options.tasks_indices[t];
139 int n_vecs_task = task_idx.vlen;
141 switch (options.loss)
147 for (
int i=0; i<n_vecs_task; i++)
149 if (y[task_idx[i]]>0)
154 for (
int i=0; i<n_vecs_task; i++)
156 if (y[task_idx[i]]>0)
157 b = double(m1)/(m1+m2);
159 b = -double(m2)/(m1+m2);
161 features->add_to_dense_vec(b,task_idx[i],ATx+t*n_feats,n_feats);
167 for (
int i=0; i<n_vecs_task; i++)
168 features->add_to_dense_vec(y[task_idx[i]],task_idx[i],ATx+t*n_feats,n_feats);
179 switch (options.loss)
185 for (
int i=0; i<n_vecs; i++)
186 y[i]>0 ? m1++ : m2++;
188 SG_SDEBUG(
"# pos = %d , # neg = %d\n",m1,m2)
190 for (
int i=0; i<n_vecs; i++)
192 y[i]>0 ? b=double(m2) /
CMath::sq(n_vecs) : b=-double(m1) / CMath::sq(n_vecs);
193 features->add_to_dense_vec(b,i,ATx,n_feats);
199 for (
int i=0; i<n_vecs; i++)
200 features->add_to_dense_vec(y[i],i,ATx,n_feats);
208 switch (options.mode)
210 case MULTITASK_GROUP:
212 for (
int i=0; i<n_feats; i++)
215 for (
int t=0; t<n_blocks; t++)
216 sum +=
CMath::pow(fabs(ATx[t*n_feats+i]),q_bar);
221 if (options.loss==LOGISTIC)
222 lambda_max /= n_vecs;
228 lambda_max = general_findLambdaMax_mt(ATx, n_feats, n_blocks, options.G, options.ind_t, options.n_nodes);
230 lambda_max = findLambdaMax_mt(ATx, n_feats, n_blocks, options.ind_t, options.n_nodes);
232 lambda_max /= n_vecs*n_blocks;
237 for (
int t=0; t<n_blocks; t++)
239 int group_ind_start = options.ind[t];
240 int group_ind_end = options.ind[t+1];
242 for (
int i=group_ind_start; i<group_ind_end; i++)
246 sum /= options.gWeight[t];
256 lambda_max = general_findLambdaMax(ATx, n_feats, options.G, options.ind_t, options.n_nodes);
258 lambda_max = findLambdaMax(ATx, n_feats, options.ind_t, options.n_nodes);
265 for (
int i=0; i<n_feats; i++)
275 SG_SINFO(
"Computed lambda = %f * %f = %f\n",z,lambda_max,z*lambda_max)
279 void projection(
double* w,
double* v,
int n_feats,
int n_blocks,
double lambda,
double lambda2,
280 double L,
double* z,
double* z0,
const slep_options& options)
282 switch (options.mode)
284 case MULTITASK_GROUP:
285 eppMatrix(w, v, n_feats, n_blocks, lambda/L, options.q);
289 general_altra_mt(w, v, n_feats, n_blocks, options.G, options.ind_t, options.n_nodes, lambda/L);
291 altra_mt(w, v, n_feats, n_blocks, options.ind_t, options.n_nodes, lambda/L);
294 eppVector(w, v, options.ind, n_blocks, n_feats, options.gWeight, lambda/L, options.q > 1e6 ? 1e6 : options.q);
298 general_altra(w, v, n_feats, options.G, options.ind_t, options.n_nodes, lambda/L);
300 altra(w, v, n_feats, options.ind_t, options.n_nodes, lambda/L);
303 for (
int i=0; i<n_feats; i++)
307 flsa(w,z,NULL,v,z0,lambda/L,lambda2/L,n_feats,1000,1e-8,1,6);
308 for (
int i=0; i<n_feats; i++)
315 double search_point_gradient_and_objective(CDotFeatures* features,
double* ATx,
double* As,
316 double* sc,
double* y,
int n_vecs,
317 int n_feats,
int n_tasks,
318 double* g,
double* gc,
319 const slep_options& options)
324 switch (options.mode)
326 case MULTITASK_GROUP:
328 for (
int t=0; t<n_tasks; t++)
330 SGVector<index_t> task_idx = options.tasks_indices[t];
331 int n_vecs_task = task_idx.vlen;
332 switch (options.loss)
336 for (
int i=0; i<n_vecs_task; i++)
338 double aa = -y[task_idx[i]]*(As[task_idx[i]]+sc[t]);
342 double b = -y[task_idx[i]]*(1.0-prob) / n_vecs;
344 features->add_to_dense_vec(b,task_idx[i],g+t*n_feats,n_feats);
348 for (
int i=0; i<n_feats*n_tasks; i++)
350 for (
int i=0; i<n_vecs_task; i++)
351 features->add_to_dense_vec(As[task_idx[i]],task_idx[i],g+t*n_feats,n_feats);
360 switch (options.loss)
365 for (
int i=0; i<n_vecs; i++)
367 double aa = -y[i]*(As[i]+sc[0]);
378 double b = -y[i]*(1.0-prob)/n_vecs;
386 features->add_to_dense_vec(b,i,g,n_feats);
391 for (
int i=0; i<n_feats; i++)
393 for (
int i=0; i<n_vecs; i++)
394 features->add_to_dense_vec(As[i],i,g,n_feats);
404 slep_result_t slep_solver(
405 CDotFeatures* features,
408 const slep_options& options)
411 int n_feats = features->get_dim_feature_space();
412 int n_vecs = features->get_num_vectors();
414 double funcp = 0.0, func = 0.0;
419 switch (options.mode)
421 case MULTITASK_GROUP:
423 n_tasks = options.n_tasks;
424 n_blocks = options.n_tasks;
429 n_blocks = options.n_feature_blocks;
437 SG_SDEBUG(
"n_tasks = %d, n_blocks = %d\n",n_tasks,n_blocks)
438 SG_SDEBUG("n_nodes = %d\n",options.n_nodes)
442 bool gradient_break = false;
444 double rsL2 = options.rsL2;
446 double* ATx = SG_CALLOC(
double, n_feats*n_tasks);
447 if (options.regularization!=0)
449 lambda = compute_lambda(ATx, z, features, y, n_vecs, n_feats, n_blocks, options);
455 double lambda2 = 0.0;
457 SGMatrix<double> w(n_feats,n_tasks);
459 SGVector<double> c(n_tasks);
462 if (options.last_result)
464 w = options.last_result->w;
465 c = options.last_result->c;
468 double* s = SG_CALLOC(
double, n_feats*n_tasks);
469 double* sc = SG_CALLOC(
double, n_tasks);
470 double* g = SG_CALLOC(
double, n_feats*n_tasks);
471 double* v = SG_CALLOC(
double, n_feats*n_tasks);
472 double* z_flsa = SG_CALLOC(
double, n_feats);
473 double* z0_flsa = SG_CALLOC(
double, n_feats);
475 double* Aw = SG_CALLOC(
double, n_vecs);
476 switch (options.mode)
478 case MULTITASK_GROUP:
481 for (t=0; t<n_blocks; t++)
483 SGVector<index_t> task_idx = options.tasks_indices[t];
485 int n_vecs_task = task_idx.vlen;
486 for (i=0; i<n_vecs_task; i++)
487 Aw[task_idx[i]] = features->dense_dot(task_idx[i],w.matrix+t*n_feats,n_feats);
496 for (i=0; i<n_vecs; i++)
497 Aw[i] = features->dense_dot(i,w.matrix,n_feats);
502 double* Av = SG_MALLOC(
double, n_vecs);
503 double* As = SG_MALLOC(
double, n_vecs);
505 double L = 1.0/n_vecs;
507 if (options.mode==FUSED)
510 double* wp = SG_CALLOC(
double, n_feats*n_tasks);
511 for (i=0; i<n_feats*n_tasks; i++)
513 double* Awp = SG_MALLOC(
double, n_vecs);
514 for (i=0; i<n_vecs; i++)
516 double* wwp = SG_CALLOC(
double, n_feats*n_tasks);
518 double* cp = SG_MALLOC(
double, n_tasks);
519 for (t=0; t<n_tasks; t++)
521 double* ccp = SG_CALLOC(
double, n_tasks);
523 double* gc = SG_MALLOC(
double, n_tasks);
524 double alphap = 0.0, alpha = 1.0;
529 beta = (alphap-1.0)/alpha;
531 for (i=0; i<n_feats*n_tasks; i++)
532 s[i] = w[i] + beta*wwp[i];
533 for (t=0; t<n_tasks; t++)
534 sc[t] = c[t] + beta*ccp[t];
535 for (i=0; i<n_vecs; i++)
536 As[i] = Aw[i] + beta*(Aw[i]-Awp[i]);
537 for (i=0; i<n_tasks*n_feats; i++)
540 double fun_s = search_point_gradient_and_objective(features, ATx, As, sc, y, n_vecs, n_feats, n_tasks, g, gc, options);
544 if (options.mode==PLAIN || options.mode==FUSED)
545 fun_s += rsL2/2 *
CMath::dot(w.matrix,w.matrix,n_feats);
547 for (i=0; i<n_feats*n_tasks; i++)
549 for (t=0; t<n_tasks; t++)
551 for (i=0; i<n_vecs; i++)
555 while (inner_iter <= 1000)
557 for (i=0; i<n_feats*n_tasks; i++)
558 v[i] = s[i] - g[i]*(1.0/L);
560 for (t=0; t<n_tasks; t++)
561 c[t] = sc[t] - gc[t]*(1.0/L);
563 projection(w.matrix,v,n_feats,n_blocks,lambda,lambda2,L,z_flsa,z0_flsa,options);
565 for (i=0; i<n_feats*n_tasks; i++)
569 switch (options.mode)
571 case MULTITASK_GROUP:
573 for (t=0; t<n_blocks; t++)
575 SGVector<index_t> task_idx = options.tasks_indices[t];
576 int n_vecs_task = task_idx.vlen;
577 for (i=0; i<n_vecs_task; i++)
579 Aw[task_idx[i]] = features->dense_dot(task_idx[i],w.matrix+t*n_feats,n_feats);
580 if (options.loss==LOGISTIC)
582 double aa = -y[task_idx[i]]*(Aw[task_idx[i]]+c[t]);
593 for (i=0; i<n_vecs; i++)
595 Aw[i] = features->dense_dot(i, w.matrix, n_feats);
596 if (options.loss==LOGISTIC)
598 double aa = -y[i]*(Aw[i]+c[0]);
608 if (options.loss==LOGISTIC)
610 if (options.mode==PLAIN || options.mode==FUSED)
611 fun_x += rsL2/2 *
CMath::dot(w.matrix,w.matrix,n_feats);
613 double l_sum = 0.0, r_sum = 0.0;
614 switch (options.loss)
618 l_sum = fun_x - fun_s -
CMath::dot(v,g,n_feats*n_tasks);
619 for (t=0; t<n_tasks; t++)
622 l_sum -= (c[t] - sc[t])*gc[t];
628 for (i=0; i<n_vecs; i++)
635 gradient_break =
true;
639 if (l_sum <= r_sum*L)
648 for (i=0; i<n_feats*n_tasks; i++)
649 wwp[i] = w[i] - wp[i];
650 for (t=0; t<n_tasks; t++)
651 ccp[t] = c[t] - cp[t];
652 double regularizer = compute_regularizer(w.matrix, lambda, lambda2, n_vecs, n_feats, n_blocks, options);
655 if (options.loss==LOGISTIC)
657 func = fun_x + regularizer;
659 if (options.loss==LEAST_SQUARES)
662 for (i=0; i<n_vecs; i++)
665 SG_SDEBUG(
"Obj = %f + %f = %f \n",fun_x, regularizer, func)
669 SG_SINFO(
"Gradient norm is less than 1e-20\n")
673 double norm_wp, norm_wwp;
675 switch (options.termination)
681 if (step <= options.tolerance)
683 SG_SINFO(
"Objective changes less than tolerance\n")
692 if (step <= step*options.tolerance)
694 SG_SINFO(
"Objective changes relatively less than tolerance\n")
700 if (func <= options.tolerance)
702 SG_SINFO(
"Objective is less than tolerance\n")
707 norm_wwp = CMath::sqrt(CMath::
dot(wwp,wwp,n_feats*n_tasks));
708 if (norm_wwp <= options.tolerance)
712 norm_wp = CMath::sqrt(CMath::
dot(wp,wp,n_feats*n_tasks));
713 norm_wwp = CMath::sqrt(CMath::
dot(wwp,wwp,n_feats*n_tasks));
714 if (norm_wwp <= options.tolerance*CMath::max(norm_wp,1.0))
723 SG_SINFO("Finished %d iterations, objective = %f\n", iter, func)
742 return slep_result_t(w,c);
746 #endif //USE_GPL_SHOGUN
Vector::Scalar dot(Vector a, Vector b)
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
static bool cancel_computations()
all of classes and functions are contained in the shogun namespace
static float64_t exp(float64_t x)
static float64_t log(float64_t v)
Matrix::Scalar max(Matrix m)
static float32_t sqrt(float32_t x)
static int32_t pow(bool x, int32_t n)