24 int n_blocks,
const slep_options& options)
26 double regularizer = 0.0;
31 for (
int i=0; i<n_feats; i++)
33 double w_row_norm = 0.0;
34 for (
int t=0; t<n_blocks; t++)
35 w_row_norm +=
CMath::pow(w[i+t*n_feats],options.q);
36 regularizer +=
CMath::pow(w_row_norm,1.0/options.q);
38 regularizer *= lambda;
43 for (
int i=0; i<n_feats; i++)
45 double tree_norm = 0.0;
48 tree_norm =
general_treeNorm(w+i, n_blocks, n_blocks, options.G, options.ind_t, options.n_nodes);
50 tree_norm =
treeNorm(w+i, n_blocks, n_blocks, options.ind_t, options.n_nodes);
52 regularizer += tree_norm;
54 regularizer *= lambda;
59 for (
int t=0; t<n_blocks; t++)
61 double group_qpow_sum = 0.0;
62 int group_ind_start = options.ind[t];
63 int group_ind_end = options.ind[t+1];
64 for (
int i=group_ind_start; i<group_ind_end; i++)
67 regularizer += options.gWeight[t]*
CMath::pow(group_qpow_sum, 1.0/options.q);
69 regularizer *= lambda;
75 regularizer =
general_treeNorm(w, 1, n_feats, options.G, options.ind_t, options.n_nodes);
77 regularizer =
treeNorm(w, 1, n_feats, options.ind_t, options.n_nodes);
79 regularizer *= lambda;
84 for (
int i=0; i<n_feats; i++)
87 regularizer *= lambda;
93 for (
int i=0; i<n_feats; i++)
95 regularizer += lambda*l1;
97 for (
int i=1; i<n_feats; i++)
99 regularizer += lambda2*fuse;
111 int n_vecs,
int n_feats,
113 const slep_options& options)
115 double lambda_max = 0.0;
122 else if (options.q>1e6)
125 q_bar = options.q/(options.q-1);
129 switch (options.mode)
131 case MULTITASK_GROUP:
134 for (
int t=0; t<n_blocks; t++)
137 int n_vecs_task = task_idx.
vlen;
139 switch (options.loss)
145 for (
int i=0; i<n_vecs_task; i++)
147 if (y[task_idx[i]]>0)
152 for (
int i=0; i<n_vecs_task; i++)
154 if (y[task_idx[i]]>0)
155 b = double(m1)/(m1+m2);
157 b = -double(m2)/(m1+m2);
165 for (
int i=0; i<n_vecs_task; i++)
166 features->
add_to_dense_vec(y[task_idx[i]],task_idx[i],ATx+t*n_feats,n_feats);
177 switch (options.loss)
183 for (
int i=0; i<n_vecs; i++)
184 y[i]>0 ? m1++ : m2++;
186 SG_SDEBUG(
"# pos = %d , # neg = %d\n",m1,m2)
188 for (
int i=0; i<n_vecs; i++)
197 for (
int i=0; i<n_vecs; i++)
206 switch (options.mode)
208 case MULTITASK_GROUP:
210 for (
int i=0; i<n_feats; i++)
213 for (
int t=0; t<n_blocks; t++)
214 sum +=
CMath::pow(fabs(ATx[t*n_feats+i]),q_bar);
219 if (options.loss==LOGISTIC)
220 lambda_max /= n_vecs;
228 lambda_max =
findLambdaMax_mt(ATx, n_feats, n_blocks, options.ind_t, options.n_nodes);
230 lambda_max /= n_vecs*n_blocks;
235 for (
int t=0; t<n_blocks; t++)
237 int group_ind_start = options.ind[t];
238 int group_ind_end = options.ind[t+1];
240 for (
int i=group_ind_start; i<group_ind_end; i++)
244 sum /= options.gWeight[t];
256 lambda_max =
findLambdaMax(ATx, n_feats, options.ind_t, options.n_nodes);
263 for (
int i=0; i<n_feats; i++)
273 SG_SINFO(
"Computed lambda = %f * %f = %f\n",z,lambda_max,z*lambda_max)
277 void projection(
double* w,
double* v,
int n_feats,
int n_blocks,
double lambda,
double lambda2,
278 double L,
double* z,
double* z0,
const slep_options& options)
280 switch (options.mode)
282 case MULTITASK_GROUP:
283 eppMatrix(w, v, n_feats, n_blocks, lambda/L, options.q);
287 general_altra_mt(w, v, n_feats, n_blocks, options.G, options.ind_t, options.n_nodes, lambda/L);
289 altra_mt(w, v, n_feats, n_blocks, options.ind_t, options.n_nodes, lambda/L);
292 eppVector(w, v, options.ind, n_blocks, n_feats, options.gWeight, lambda/L, options.q > 1e6 ? 1e6 : options.q);
296 general_altra(w, v, n_feats, options.G, options.ind_t, options.n_nodes, lambda/L);
298 altra(w, v, n_feats, options.ind_t, options.n_nodes, lambda/L);
301 for (
int i=0; i<n_feats; i++)
305 flsa(w,z,NULL,v,z0,lambda/L,lambda2/L,n_feats,1000,1e-8,1,6);
306 for (
int i=0; i<n_feats; i++)
314 double* sc,
double* y,
int n_vecs,
315 int n_feats,
int n_tasks,
316 double* g,
double* gc,
317 const slep_options& options)
322 switch (options.mode)
324 case MULTITASK_GROUP:
326 for (
int t=0; t<n_tasks; t++)
329 int n_vecs_task = task_idx.
vlen;
330 switch (options.loss)
334 for (
int i=0; i<n_vecs_task; i++)
336 double aa = -y[task_idx[i]]*(As[task_idx[i]]+sc[t]);
340 double b = -y[task_idx[i]]*(1.0-prob) / n_vecs;
346 for (
int i=0; i<n_feats*n_tasks; i++)
348 for (
int i=0; i<n_vecs_task; i++)
349 features->
add_to_dense_vec(As[task_idx[i]],task_idx[i],g+t*n_feats,n_feats);
358 switch (options.loss)
363 for (
int i=0; i<n_vecs; i++)
365 double aa = -y[i]*(As[i]+sc[0]);
376 double b = -y[i]*(1.0-prob)/n_vecs;
389 for (
int i=0; i<n_feats; i++)
391 for (
int i=0; i<n_vecs; i++)
406 const slep_options& options)
412 double funcp = 0.0, func = 0.0;
417 switch (options.mode)
419 case MULTITASK_GROUP:
421 n_tasks = options.n_tasks;
422 n_blocks = options.n_tasks;
427 n_blocks = options.n_feature_blocks;
435 SG_SDEBUG(
"n_tasks = %d, n_blocks = %d\n",n_tasks,n_blocks)
436 SG_SDEBUG(
"n_nodes = %d\n",options.n_nodes)
440 bool gradient_break =
false;
442 double rsL2 = options.rsL2;
444 double* ATx = SG_CALLOC(
double, n_feats*n_tasks);
445 if (options.regularization!=0)
447 lambda =
compute_lambda(ATx, z, features, y, n_vecs, n_feats, n_blocks, options);
453 double lambda2 = 0.0;
460 if (options.last_result)
462 w = options.last_result->w;
463 c = options.last_result->c;
466 double* s = SG_CALLOC(
double, n_feats*n_tasks);
467 double* sc = SG_CALLOC(
double, n_tasks);
468 double* g = SG_CALLOC(
double, n_feats*n_tasks);
469 double* v = SG_CALLOC(
double, n_feats*n_tasks);
470 double* z_flsa = SG_CALLOC(
double, n_feats);
471 double* z0_flsa = SG_CALLOC(
double, n_feats);
473 double* Aw = SG_CALLOC(
double, n_vecs);
474 switch (options.mode)
476 case MULTITASK_GROUP:
479 for (t=0; t<n_blocks; t++)
483 int n_vecs_task = task_idx.
vlen;
484 for (i=0; i<n_vecs_task; i++)
485 Aw[task_idx[i]] = features->
dense_dot(task_idx[i],w.
matrix+t*n_feats,n_feats);
494 for (i=0; i<n_vecs; i++)
500 double* Av = SG_MALLOC(
double, n_vecs);
501 double* As = SG_MALLOC(
double, n_vecs);
503 double L = 1.0/n_vecs;
505 if (options.mode==FUSED)
508 double* wp = SG_CALLOC(
double, n_feats*n_tasks);
509 for (i=0; i<n_feats*n_tasks; i++)
511 double* Awp = SG_MALLOC(
double, n_vecs);
512 for (i=0; i<n_vecs; i++)
514 double* wwp = SG_CALLOC(
double, n_feats*n_tasks);
516 double* cp = SG_MALLOC(
double, n_tasks);
517 for (t=0; t<n_tasks; t++)
519 double* ccp = SG_CALLOC(
double, n_tasks);
521 double* gc = SG_MALLOC(
double, n_tasks);
522 double alphap = 0.0, alpha = 1.0;
527 beta = (alphap-1.0)/alpha;
529 for (i=0; i<n_feats*n_tasks; i++)
530 s[i] = w[i] + beta*wwp[i];
531 for (t=0; t<n_tasks; t++)
532 sc[t] = c[t] + beta*ccp[t];
533 for (i=0; i<n_vecs; i++)
534 As[i] = Aw[i] + beta*(Aw[i]-Awp[i]);
535 for (i=0; i<n_tasks*n_feats; i++)
538 double fun_s =
search_point_gradient_and_objective(features, ATx, As, sc, y, n_vecs, n_feats, n_tasks, g, gc, options);
542 if (options.mode==PLAIN || options.mode==FUSED)
545 for (i=0; i<n_feats*n_tasks; i++)
547 for (t=0; t<n_tasks; t++)
549 for (i=0; i<n_vecs; i++)
553 while (inner_iter <= 1000)
555 for (i=0; i<n_feats*n_tasks; i++)
556 v[i] = s[i] - g[i]*(1.0/L);
558 for (t=0; t<n_tasks; t++)
559 c[t] = sc[t] - gc[t]*(1.0/L);
561 projection(w.
matrix,v,n_feats,n_blocks,lambda,lambda2,L,z_flsa,z0_flsa,options);
563 for (i=0; i<n_feats*n_tasks; i++)
567 switch (options.mode)
569 case MULTITASK_GROUP:
571 for (t=0; t<n_blocks; t++)
574 int n_vecs_task = task_idx.
vlen;
575 for (i=0; i<n_vecs_task; i++)
577 Aw[task_idx[i]] = features->
dense_dot(task_idx[i],w.
matrix+t*n_feats,n_feats);
578 if (options.loss==LOGISTIC)
580 double aa = -y[task_idx[i]]*(Aw[task_idx[i]]+c[t]);
591 for (i=0; i<n_vecs; i++)
594 if (options.loss==LOGISTIC)
596 double aa = -y[i]*(Aw[i]+c[0]);
606 if (options.loss==LOGISTIC)
608 if (options.mode==PLAIN || options.mode==FUSED)
611 double l_sum = 0.0, r_sum = 0.0;
612 switch (options.loss)
616 l_sum = fun_x - fun_s -
CMath::dot(v,g,n_feats*n_tasks);
617 for (t=0; t<n_tasks; t++)
620 l_sum -= (c[t] - sc[t])*gc[t];
626 for (i=0; i<n_vecs; i++)
633 gradient_break =
true;
637 if (l_sum <= r_sum*L)
646 for (i=0; i<n_feats*n_tasks; i++)
647 wwp[i] = w[i] - wp[i];
648 for (t=0; t<n_tasks; t++)
649 ccp[t] = c[t] - cp[t];
653 if (options.loss==LOGISTIC)
655 func = fun_x + regularizer;
657 if (options.loss==LEAST_SQUARES)
660 for (i=0; i<n_vecs; i++)
663 SG_SDEBUG(
"Obj = %f + %f = %f \n",fun_x, regularizer, func)
667 SG_SINFO(
"Gradient norm is less than 1e-20\n")
671 double norm_wp, norm_wwp;
673 switch (options.termination)
679 if (step <= options.tolerance)
681 SG_SINFO(
"Objective changes less than tolerance\n")
690 if (step <= step*options.tolerance)
692 SG_SINFO(
"Objective changes relatively less than tolerance\n")
698 if (func <= options.tolerance)
700 SG_SINFO(
"Objective is less than tolerance\n")
706 if (norm_wwp <= options.tolerance)
712 if (norm_wwp <= options.tolerance*
CMath::max(norm_wp,1.0))
721 SG_SINFO(
"Finished %d iterations, objective = %f\n", iter, func)
740 return slep_result_t(w,c);
double treeNorm(double *x, int ldx, int n, double *ind, int nodes)
void eppVector(double *x, double *v, int *ind, int k, int n, double *rho, double rho_multiplier, double p)
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0
virtual int32_t get_num_vectors() const =0
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)=0
void general_altra(double *x, double *v, int n, double *G, double *ind, int nodes, double mult)
Features that support dot products among other operations.
virtual int32_t get_dim_feature_space() const =0
double general_treeNorm(double *x, int ldx, int n, double *G, double *ind, int nodes)
double compute_regularizer(double *w, double lambda, double lambda2, int n_vecs, int n_feats, int n_blocks, const slep_options &options)
double search_point_gradient_and_objective(CDotFeatures *features, double *ATx, double *As, double *sc, double *y, int n_vecs, int n_feats, int n_tasks, double *g, double *gc, const slep_options &options)
void eppMatrix(double *X, double *V, int k, int n, double rho, double p)
void general_altra_mt(double *X, double *V, int n, int k, double *G, double *ind, int nodes, double mult)
double general_findLambdaMax_mt(double *V, int n, int k, double *G, double *ind, int nodes)
double compute_lambda(double *ATx, double z, CDotFeatures *features, double *y, int n_vecs, int n_feats, int n_blocks, const slep_options &options)
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
static bool cancel_computations()
void altra(double *x, double *v, int n, double *ind, int nodes, double mult)
slep_result_t slep_solver(CDotFeatures *features, double *y, double z, const slep_options &options)
double findLambdaMax(double *v, int n, double *ind, int nodes)
all of classes and functions are contained in the shogun namespace
void flsa(double *x, double *z, double *infor, double *v, double *z0, double lambda1, double lambda2, int n, int maxStep, double tol, int tau, int flag)
static float64_t exp(float64_t x)
double findLambdaMax_mt(double *V, int n, int k, double *ind, int nodes)
static float64_t log(float64_t v)
static const float64_t ALMOST_INFTY
Matrix::Scalar max(Matrix m)
static float32_t sqrt(float32_t x)
void altra_mt(double *X, double *V, int n, int k, double *ind, int nodes, double mult)
double general_findLambdaMax(double *v, int n, double *G, double *ind, int nodes)
static int32_t pow(bool x, int32_t n)
void projection(double *w, double *v, int n_feats, int n_blocks, double lambda, double lambda2, double L, double *z, double *z0, const slep_options &options)