slep_solver.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Sergey Lisitsyn
00008  * Copyright (C) 2010-2012 Jun Liu, Jieping Ye
00009  */
00010 
00011 #include <shogun/lib/slep/slep_solver.h>
00012 #include <shogun/mathematics/Math.h>
00013 #include <shogun/lib/slep/q1/eppMatrix.h>
00014 #include <shogun/lib/slep/q1/eppVector.h>
00015 #include <shogun/lib/slep/flsa/flsa.h>
00016 #include <shogun/lib/slep/tree/altra.h>
00017 #include <shogun/lib/slep/tree/general_altra.h>
00018 #include <shogun/lib/Signal.h>
00019 
00020 namespace shogun
00021 {
00022 
00023 double compute_regularizer(double* w, double lambda, double lambda2, int n_vecs, int n_feats, 
00024                            int n_blocks, const slep_options& options)
00025 {
00026     double regularizer = 0.0;
00027     switch (options.mode)
00028     {
00029         case MULTITASK_GROUP:
00030         {
00031             for (int i=0; i<n_feats; i++)
00032             {
00033                 double w_row_norm = 0.0;
00034                 for (int t=0; t<n_blocks; t++)
00035                     w_row_norm += CMath::pow(w[i+t*n_feats],options.q);
00036                 regularizer += CMath::pow(w_row_norm,1.0/options.q);
00037             }
00038             regularizer *= lambda;
00039         }
00040         break;
00041         case MULTITASK_TREE:
00042         {
00043             for (int i=0; i<n_feats; i++)
00044             {
00045                 double tree_norm = 0.0;
00046 
00047                 if (options.general)
00048                     tree_norm = general_treeNorm(w+i, n_blocks, n_blocks, options.G, options.ind_t, options.n_nodes);
00049                 else
00050                     tree_norm = treeNorm(w+i, n_blocks, n_blocks, options.ind_t, options.n_nodes);
00051 
00052                 regularizer += tree_norm;
00053             }
00054             regularizer *= lambda;
00055         }
00056         break;
00057         case FEATURE_GROUP:
00058         {
00059             for (int t=0; t<n_blocks; t++)
00060             {
00061                 double group_qpow_sum = 0.0;
00062                 int group_ind_start = options.ind[t];
00063                 int group_ind_end = options.ind[t+1];
00064                 for (int i=group_ind_start; i<group_ind_end; i++)
00065                     group_qpow_sum += CMath::pow(w[i], options.q);
00066 
00067                 regularizer += options.gWeight[t]*CMath::pow(group_qpow_sum, 1.0/options.q);
00068             }
00069             regularizer *= lambda;
00070         }
00071         break;
00072         case FEATURE_TREE:
00073         {
00074             if (options.general)
00075                 regularizer = general_treeNorm(w, 1, n_feats, options.G, options.ind_t, options.n_nodes);
00076             else
00077                 regularizer = treeNorm(w, 1, n_feats, options.ind_t, options.n_nodes);
00078 
00079             regularizer *= lambda;
00080         }
00081         break;
00082         case PLAIN:
00083         {
00084             for (int i=0; i<n_feats; i++)
00085                 regularizer += CMath::abs(w[i]);
00086 
00087             regularizer *= lambda;
00088         }
00089         break;
00090         case FUSED:
00091         {
00092             double l1 = 0.0;
00093             for (int i=0; i<n_feats; i++)
00094                 l1 += CMath::abs(w[i]);
00095             regularizer += lambda*l1;
00096             double fuse = 0.0;
00097             for (int i=1; i<n_feats; i++)
00098                 fuse += CMath::abs(w[i]-w[i-1]);
00099             regularizer += lambda2*fuse;
00100         }
00101         break;
00102     }
00103     return regularizer;
00104 };
00105 
00106 double compute_lambda(
00107         double* ATx,
00108         double z, 
00109         CDotFeatures* features, 
00110         double* y, 
00111         int n_vecs, int n_feats, 
00112         int n_blocks,
00113         const slep_options& options)
00114 {
00115     double lambda_max = 0.0;
00116     if (z<0 || z>1)
00117         SG_SERROR("z is not in range [0,1]");
00118 
00119     double q_bar = 0.0;
00120     if (options.q==1)
00121         q_bar = CMath::ALMOST_INFTY;
00122     else if (options.q>1e6)
00123         q_bar = 1;
00124     else 
00125         q_bar = options.q/(options.q-1);
00126 
00127     SG_SINFO("q bar = %f \n",q_bar);
00128 
00129     switch (options.mode)
00130     {
00131         case MULTITASK_GROUP:
00132         case MULTITASK_TREE:
00133         {
00134             for (int t=0; t<n_blocks; t++)
00135             {
00136                 SGVector<index_t> task_idx = options.tasks_indices[t];
00137                 int n_vecs_task = task_idx.vlen;
00138             
00139                 switch (options.loss)
00140                 {
00141                     case LOGISTIC:
00142                     {
00143                         double b = 0.0;
00144                         int m1 = 0, m2 = 0;
00145                         for (int i=0; i<n_vecs_task; i++)
00146                         {
00147                             if (y[task_idx[i]]>0)
00148                                 m1++;
00149                             else
00150                                 m2++;
00151                         }
00152                         for (int i=0; i<n_vecs_task; i++)
00153                         {
00154                             if (y[task_idx[i]]>0)
00155                                 b = double(m1)/(m1+m2);
00156                             else
00157                                 b = -double(m2)/(m1+m2);
00158 
00159                             features->add_to_dense_vec(b,task_idx[i],ATx+t*n_feats,n_feats);
00160                         }
00161                     }
00162                     break;
00163                     case LEAST_SQUARES:
00164                     {
00165                         for (int i=0; i<n_vecs_task; i++)
00166                             features->add_to_dense_vec(y[task_idx[i]],task_idx[i],ATx+t*n_feats,n_feats);
00167                     }
00168                 }
00169             }
00170         }
00171         break;
00172         case FEATURE_GROUP:
00173         case FEATURE_TREE:
00174         case PLAIN:
00175         case FUSED:
00176         {
00177             switch (options.loss)
00178             {
00179                 case LOGISTIC:
00180                 {
00181                     int m1 = 0, m2 = 0;
00182                     double b = 0.0;
00183                     for (int i=0; i<n_vecs; i++)
00184                         y[i]>0 ? m1++ : m2++;
00185 
00186                     SG_SDEBUG("# pos = %d , # neg = %d\n",m1,m2);
00187 
00188                     for (int i=0; i<n_vecs; i++)
00189                     {
00190                         y[i]>0 ? b=double(m2) / CMath::sq(n_vecs) : b=-double(m1) / CMath::sq(n_vecs);
00191                         features->add_to_dense_vec(b,i,ATx,n_feats);
00192                     }
00193                 }
00194                 break;
00195                 case LEAST_SQUARES:
00196                 {
00197                     for (int i=0; i<n_vecs; i++)
00198                         features->add_to_dense_vec(y[i],i,ATx,n_feats);
00199                 }
00200                 break;
00201             }
00202         }
00203         break;
00204     }
00205 
00206     switch (options.mode)
00207     {
00208         case MULTITASK_GROUP:
00209         {
00210             for (int i=0; i<n_feats; i++)
00211             {
00212                 double sum = 0.0;
00213                 for (int t=0; t<n_blocks; t++)
00214                     sum += CMath::pow(fabs(ATx[t*n_feats+i]),q_bar);
00215                 lambda_max = 
00216                     CMath::max(lambda_max, CMath::pow(sum,1.0/q_bar)); 
00217             }
00218 
00219             if (options.loss==LOGISTIC) 
00220                 lambda_max /= n_vecs;
00221         }
00222         break;
00223         case MULTITASK_TREE:
00224         {
00225             if (options.general)
00226                 lambda_max = general_findLambdaMax_mt(ATx, n_feats, n_blocks, options.G, options.ind_t, options.n_nodes);
00227             else
00228                 lambda_max = findLambdaMax_mt(ATx, n_feats, n_blocks, options.ind_t, options.n_nodes);
00229 
00230             lambda_max /= n_vecs*n_blocks;
00231         }
00232         break;
00233         case FEATURE_GROUP:
00234         {
00235             for (int t=0; t<n_blocks; t++)
00236             {
00237                 int group_ind_start = options.ind[t];
00238                 int group_ind_end = options.ind[t+1];
00239                 double sum = 0.0;
00240                 for (int i=group_ind_start; i<group_ind_end; i++)
00241                     sum += CMath::pow(fabs(ATx[i]),q_bar);
00242                 
00243                 sum = CMath::pow(sum, 1.0/q_bar);
00244                 sum /= options.gWeight[t];
00245                 SG_SINFO("sum = %f\n",sum);
00246                 if (sum>lambda_max)
00247                     lambda_max = sum;
00248             }
00249         }
00250         break;
00251         case FEATURE_TREE:
00252         {
00253             if (options.general)
00254                 lambda_max = general_findLambdaMax(ATx, n_feats, options.G, options.ind_t, options.n_nodes);
00255             else
00256                 lambda_max = findLambdaMax(ATx, n_feats, options.ind_t, options.n_nodes);
00257         }
00258         break;
00259         case PLAIN:
00260         case FUSED:
00261         {
00262             double max = 0.0;
00263             for (int i=0; i<n_feats; i++)
00264             {
00265                 if (CMath::abs(ATx[i]) > max)
00266                     max = CMath::abs(ATx[i]);
00267             }
00268             lambda_max = max;
00269         }
00270         break;
00271     }
00272 
00273     SG_SINFO("Computed lambda = %f * %f = %f\n",z,lambda_max,z*lambda_max);
00274     return z*lambda_max;
00275 }
00276 
00277 void projection(double* w, double* v, int n_feats, int n_blocks, double lambda, double lambda2,  
00278                 double L, double* z, double* z0, const slep_options& options)
00279 {
00280     switch (options.mode)
00281     {
00282         case MULTITASK_GROUP:
00283             eppMatrix(w, v, n_feats, n_blocks, lambda/L, options.q);
00284         break;
00285         case MULTITASK_TREE:
00286             if (options.general)
00287                 general_altra_mt(w, v, n_feats, n_blocks, options.G, options.ind_t, options.n_nodes, lambda/L);
00288             else
00289                 altra_mt(w, v, n_feats, n_blocks, options.ind_t, options.n_nodes, lambda/L);
00290         break;
00291         case FEATURE_GROUP:
00292             eppVector(w, v, options.ind, n_blocks, n_feats, options.gWeight, lambda/L, options.q > 1e6 ? 1e6 : options.q);
00293         break;
00294         case FEATURE_TREE:
00295             if (options.general)
00296                 general_altra(w, v, n_feats, options.G, options.ind_t, options.n_nodes, lambda/L);
00297             else
00298                 altra(w, v, n_feats, options.ind_t, options.n_nodes, lambda/L);
00299         break;
00300         case PLAIN:
00301             for (int i=0; i<n_feats; i++)
00302                 w[i] = CMath::sign(v[i])*CMath::max(0.0,CMath::abs(v[i])-lambda/L);
00303         break;
00304         case FUSED:
00305             flsa(w,z,NULL,v,z0,lambda/L,lambda2/L,n_feats,1000,1e-8,1,6);
00306             for (int i=0; i<n_feats; i++)
00307                 z0[i] = z[i];
00308         break;
00309     }
00310 
00311 }
00312 
00313 double search_point_gradient_and_objective(CDotFeatures* features, double* ATx, double* As, 
00314                                            double* sc, double* y, int n_vecs, 
00315                                            int n_feats, int n_tasks,
00316                                            double* g, double* gc,
00317                                            const slep_options& options)
00318 {
00319     double fun_s = 0.0;
00320     //SG_SDEBUG("As=%f\n", SGVector<float64_t>::dot(As,As,n_vecs));
00321     //SG_SDEBUG("sc=%f\n", SGVector<float64_t>::dot(sc,sc,n_tasks));
00322     switch (options.mode)
00323     {
00324         case MULTITASK_GROUP:
00325         case MULTITASK_TREE:
00326             for (int t=0; t<n_tasks; t++)
00327             {
00328                 SGVector<index_t> task_idx = options.tasks_indices[t];
00329                 int n_vecs_task = task_idx.vlen;
00330                 switch (options.loss)
00331                 {
00332                     case LOGISTIC:
00333                         gc[t] = 0.0;
00334                         for (int i=0; i<n_vecs_task; i++)
00335                         {
00336                             double aa = -y[task_idx[i]]*(As[task_idx[i]]+sc[t]);
00337                             double bb = CMath::max(aa,0.0);
00338                             fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)/ n_vecs;
00339                             double prob = 1.0/(1.0+CMath::exp(aa));
00340                             double b = -y[task_idx[i]]*(1.0-prob) / n_vecs;
00341                             gc[t] += b;
00342                             features->add_to_dense_vec(b,task_idx[i],g+t*n_feats,n_feats);
00343                         }
00344                     break;
00345                     case LEAST_SQUARES:
00346                         for (int i=0; i<n_feats*n_tasks; i++)
00347                             g[i] = -ATx[i];
00348                         for (int i=0; i<n_vecs_task; i++)
00349                             features->add_to_dense_vec(As[task_idx[i]],task_idx[i],g+t*n_feats,n_feats);
00350                     break;
00351                 }
00352             }
00353         break;
00354         case FEATURE_GROUP:
00355         case FEATURE_TREE:
00356         case PLAIN:
00357         case FUSED:
00358             switch (options.loss)
00359             {
00360                 case LOGISTIC:
00361                     gc[0] = 0.0;
00362 
00363                     for (int i=0; i<n_vecs; i++)
00364                     {
00365                         double aa = -y[i]*(As[i]+sc[0]);
00366                         double bb = CMath::max(aa,0.0);
00367                         fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);
00368                         /*
00369                         if (y[i]>0)
00370                             fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)*pos_weight;
00371                         else
00372                             fun_s += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb)*neg_weight;
00373                         */
00374                         double prob = 1.0/(1.0+CMath::exp(aa));
00375                         //double b = 0;
00376                         double b = -y[i]*(1.0-prob)/n_vecs;
00377                         /*
00378                         if (y[i]>0)
00379                             b = -y[i]*(1.0-prob)*pos_weight;
00380                         else
00381                             b = -y[i]*(1.0-prob)*neg_weight;
00382                         */
00383                         gc[0] += b;
00384                         features->add_to_dense_vec(b,i,g,n_feats);
00385                     }
00386                     fun_s /= n_vecs;
00387                 break;
00388                 case LEAST_SQUARES:
00389                     for (int i=0; i<n_feats; i++)
00390                         g[i] = -ATx[i];
00391                     for (int i=0; i<n_vecs; i++)
00392                         features->add_to_dense_vec(As[i],i,g,n_feats);
00393                 break;
00394             }
00395         break;
00396     }
00397     SG_SDEBUG("G=%f\n", SGVector<float64_t>::dot(g,g,n_feats*n_tasks));
00398 
00399     return fun_s;
00400 }
00401 
00402 slep_result_t slep_solver(
00403         CDotFeatures* features,
00404         double* y,
00405         double z,
00406         const slep_options& options)
00407 {
00408     int i,t;
00409     int n_feats = features->get_dim_feature_space();
00410     int n_vecs = features->get_num_vectors();
00411     double lambda, beta;
00412     double funcp = 0.0, func = 0.0;
00413 
00414     int n_blocks = 0;
00415     int n_tasks = 0;
00416 
00417     switch (options.mode)
00418     {
00419         case MULTITASK_GROUP:
00420         case MULTITASK_TREE:
00421             n_tasks = options.n_tasks;
00422             n_blocks = options.n_tasks;
00423         break;
00424         case FEATURE_GROUP:
00425         case FEATURE_TREE:
00426             n_tasks = 1;
00427             n_blocks = options.n_feature_blocks;
00428         break;
00429         case PLAIN:
00430         case FUSED:
00431             n_tasks = 1;
00432             n_blocks = 1;
00433         break;
00434     }
00435     SG_SDEBUG("n_tasks = %d, n_blocks = %d\n",n_tasks,n_blocks);
00436     SG_SDEBUG("n_nodes = %d\n",options.n_nodes);
00437 
00438     int iter = 1;
00439     bool done = false;
00440     bool gradient_break = false;
00441 
00442     double rsL2 = options.rsL2;
00443 
00444     double* ATx = SG_CALLOC(double, n_feats*n_tasks);
00445     if (options.regularization!=0)
00446     {
00447         lambda = compute_lambda(ATx, z, features, y, n_vecs, n_feats, n_blocks, options);
00448         rsL2*= lambda;
00449     }
00450     else 
00451         lambda = z;
00452 
00453     double lambda2 = 0.0;
00454 
00455     SGMatrix<double> w(n_feats,n_tasks);
00456     w.zero();
00457     SGVector<double> c(n_tasks);
00458     c.zero();
00459     
00460     if (options.last_result)
00461     {
00462         w = options.last_result->w;
00463         c = options.last_result->c;
00464     }
00465 
00466     double* s = SG_CALLOC(double, n_feats*n_tasks);
00467     double* sc = SG_CALLOC(double, n_tasks);
00468     double* g = SG_CALLOC(double, n_feats*n_tasks);
00469     double* v = SG_CALLOC(double, n_feats*n_tasks);
00470     double* z_flsa = SG_CALLOC(double, n_feats);
00471     double* z0_flsa = SG_CALLOC(double, n_feats);
00472 
00473     double* Aw = SG_CALLOC(double, n_vecs);
00474     switch (options.mode)
00475     {
00476         case MULTITASK_GROUP:
00477         case MULTITASK_TREE:
00478         {
00479             for (t=0; t<n_blocks; t++)
00480             {
00481                 SGVector<index_t> task_idx = options.tasks_indices[t];
00482                 //task_idx.display_vector("task");
00483                 int n_vecs_task = task_idx.vlen;
00484                 for (i=0; i<n_vecs_task; i++)
00485                     Aw[task_idx[i]] = features->dense_dot(task_idx[i],w.matrix+t*n_feats,n_feats);
00486             }
00487         }
00488         break;
00489         case FEATURE_GROUP:
00490         case FEATURE_TREE:
00491         case PLAIN:
00492         case FUSED:
00493         {
00494             for (i=0; i<n_vecs; i++)
00495                 Aw[i] = features->dense_dot(i,w.matrix,n_feats);
00496         }
00497         break;
00498     }
00499 
00500     double* Av = SG_MALLOC(double, n_vecs);
00501     double* As = SG_MALLOC(double, n_vecs);
00502 
00503     double L = 1.0/n_vecs;
00504 
00505     if (options.mode==FUSED)
00506         L += rsL2;
00507 
00508     double* wp = SG_CALLOC(double, n_feats*n_tasks);
00509     for (i=0; i<n_feats*n_tasks; i++) 
00510         wp[i] = w[i];
00511     double* Awp = SG_MALLOC(double, n_vecs);
00512     for (i=0; i<n_vecs; i++) 
00513         Awp[i] = Aw[i];
00514     double* wwp = SG_CALLOC(double, n_feats*n_tasks);
00515 
00516     double* cp = SG_MALLOC(double, n_tasks);
00517     for (t=0; t<n_tasks; t++) 
00518         cp[t] = c[t];
00519     double* ccp = SG_CALLOC(double, n_tasks);
00520 
00521     double* gc = SG_MALLOC(double, n_tasks);
00522     double alphap = 0.0, alpha = 1.0;
00523     double fun_x = 0.0;
00524     
00525     while (!done && iter <= options.max_iter && !CSignal::cancel_computations()) 
00526     {
00527         beta = (alphap-1.0)/alpha;
00528 
00529         for (i=0; i<n_feats*n_tasks; i++)
00530             s[i] = w[i] + beta*wwp[i];
00531         for (t=0; t<n_tasks; t++)
00532             sc[t] = c[t] + beta*ccp[t];
00533         for (i=0; i<n_vecs; i++)
00534             As[i] = Aw[i] + beta*(Aw[i]-Awp[i]);
00535         for (i=0; i<n_tasks*n_feats; i++) 
00536             g[i] = 0.0;
00537 
00538         double fun_s = search_point_gradient_and_objective(features, ATx, As, sc, y, n_vecs, n_feats, n_tasks, g, gc, options);
00539 
00540         //SG_SDEBUG("fun_s = %f\n", fun_s);
00541 
00542         if (options.mode==PLAIN || options.mode==FUSED)
00543             fun_s += rsL2/2 * SGVector<float64_t>::dot(w.matrix,w.matrix,n_feats);
00544 
00545         for (i=0; i<n_feats*n_tasks; i++)
00546             wp[i] = w[i];
00547         for (t=0; t<n_tasks; t++)
00548             cp[t] = c[t];
00549         for (i=0; i<n_vecs; i++) 
00550             Awp[i] = Aw[i];
00551 
00552         int inner_iter = 1;
00553         while (inner_iter <= 1000)
00554         {
00555             for (i=0; i<n_feats*n_tasks; i++)
00556                 v[i] = s[i] - g[i]*(1.0/L);
00557 
00558             for (t=0; t<n_tasks; t++)
00559                 c[t] = sc[t] - gc[t]*(1.0/L);
00560             
00561             projection(w.matrix,v,n_feats,n_blocks,lambda,lambda2,L,z_flsa,z0_flsa,options);
00562 
00563             for (i=0; i<n_feats*n_tasks; i++)
00564                 v[i] = w[i] - s[i];
00565 
00566             fun_x = 0.0;
00567             switch (options.mode)
00568             {
00569                 case MULTITASK_GROUP:
00570                 case MULTITASK_TREE:
00571                     for (t=0; t<n_blocks; t++)
00572                     {
00573                         SGVector<index_t> task_idx = options.tasks_indices[t];
00574                         int n_vecs_task = task_idx.vlen;
00575                         for (i=0; i<n_vecs_task; i++)
00576                         {
00577                             Aw[task_idx[i]] = features->dense_dot(task_idx[i],w.matrix+t*n_feats,n_feats);
00578                             if (options.loss==LOGISTIC)
00579                             {
00580                                 double aa = -y[task_idx[i]]*(Aw[task_idx[i]]+c[t]);
00581                                 double bb = CMath::max(aa,0.0);
00582                                 fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);
00583                             }
00584                         }
00585                     }
00586                 break;
00587                 case FEATURE_GROUP:
00588                 case FEATURE_TREE:
00589                 case PLAIN:
00590                 case FUSED:
00591                     for (i=0; i<n_vecs; i++)
00592                     {
00593                         Aw[i] = features->dense_dot(i, w.matrix, n_feats);
00594                         if (options.loss==LOGISTIC)
00595                         {
00596                             double aa = -y[i]*(Aw[i]+c[0]);
00597                             double bb = CMath::max(aa,0.0);
00598                             if (y[i]>0)
00599                                 fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);//*pos_weight;
00600                             else
00601                                 fun_x += (CMath::log(CMath::exp(-bb) + CMath::exp(aa-bb)) + bb);//*neg_weight;
00602                         }
00603                     }
00604                 break;
00605             }
00606             if (options.loss==LOGISTIC)
00607                 fun_x /= n_vecs;
00608             if (options.mode==PLAIN || options.mode==FUSED)
00609                 fun_x += rsL2/2 * SGVector<float64_t>::dot(w.matrix,w.matrix,n_feats);
00610             
00611             double l_sum = 0.0, r_sum = 0.0;
00612             switch (options.loss)
00613             {
00614                 case LOGISTIC:
00615                     r_sum = SGVector<float64_t>::dot(v,v,n_feats*n_tasks);
00616                     l_sum = fun_x - fun_s - SGVector<float64_t>::dot(v,g,n_feats*n_tasks);
00617                     for (t=0; t<n_tasks; t++)
00618                     {
00619                         r_sum += CMath::sq(c[t] - sc[t]);
00620                         l_sum -= (c[t] - sc[t])*gc[t];
00621                     }
00622                     r_sum /= 2.0;
00623                 break;
00624                 case LEAST_SQUARES:
00625                     r_sum = SGVector<float64_t>::dot(v,v,n_feats*n_tasks);
00626                     for (i=0; i<n_vecs; i++)
00627                         l_sum += CMath::sq(Aw[i]-As[i]);
00628                 break;
00629             }
00630 
00631             if (r_sum <= 1e-20)
00632             {
00633                 gradient_break = true;
00634                 break;
00635             }
00636 
00637             if (l_sum <= r_sum*L)
00638                 break;
00639             else 
00640                 L = CMath::max(2*L, l_sum/r_sum);
00641             inner_iter++;
00642         }
00643 
00644         alphap = alpha;
00645         alpha = 0.5*(1+CMath::sqrt(4*alpha*alpha+1));
00646         for (i=0; i<n_feats*n_tasks; i++)
00647             wwp[i] = w[i] - wp[i];
00648         for (t=0; t<n_tasks; t++)
00649             ccp[t] = c[t] - cp[t];
00650         double regularizer = compute_regularizer(w.matrix, lambda, lambda2, n_vecs, n_feats, n_blocks, options);
00651         funcp = func;
00652 
00653         if (options.loss==LOGISTIC)
00654         {
00655             func = fun_x + regularizer;
00656         }
00657         if (options.loss==LEAST_SQUARES)
00658         {
00659             func = regularizer;
00660             for (i=0; i<n_vecs; i++)
00661                 func += CMath::sq(Aw[i] - y[i]);
00662         }
00663         SG_SDEBUG("Obj = %f + %f = %f \n",fun_x, regularizer, func);
00664 
00665         if (gradient_break)
00666         {
00667             SG_SINFO("Gradient norm is less than 1e-20\n");
00668             break;
00669         }
00670 
00671         double norm_wp, norm_wwp;
00672         double step;
00673         switch (options.termination)
00674         {
00675             case 0:
00676                 if (iter>=2)
00677                 {
00678                     step = CMath::abs(func-funcp);
00679                     if (step <= options.tolerance)
00680                     {
00681                         SG_SINFO("Objective changes less than tolerance\n");
00682                         done = true;
00683                     }
00684                 }
00685             break;
00686             case 1:
00687                 if (iter>=2)
00688                 {
00689                     step = CMath::abs(func-funcp);
00690                     if (step <= step*options.tolerance)
00691                     {
00692                         SG_SINFO("Objective changes relatively less than tolerance\n");
00693                         done = true;
00694                     }
00695                 }
00696             break;
00697             case 2:
00698                 if (func <= options.tolerance)
00699                 {
00700                     SG_SINFO("Objective is less than tolerance\n");
00701                     done = true;
00702                 }
00703             break;
00704             case 3:
00705                 norm_wwp = CMath::sqrt(SGVector<float64_t>::dot(wwp,wwp,n_feats*n_tasks));
00706                 if (norm_wwp <= options.tolerance)
00707                     done = true;
00708             break;
00709             case 4:
00710                 norm_wp = CMath::sqrt(SGVector<float64_t>::dot(wp,wp,n_feats*n_tasks));
00711                 norm_wwp = CMath::sqrt(SGVector<float64_t>::dot(wwp,wwp,n_feats*n_tasks));
00712                 if (norm_wwp <= options.tolerance*CMath::max(norm_wp,1.0))
00713                     done = true;
00714             break;
00715             default: 
00716                 done = true;
00717         }
00718 
00719         iter++;
00720     }
00721     SG_SINFO("Finished %d iterations, objective = %f\n", iter, func);
00722 
00723     SG_FREE(ATx);
00724     SG_FREE(wp);
00725     SG_FREE(wwp);
00726     SG_FREE(s);
00727     SG_FREE(sc);
00728     SG_FREE(cp);
00729     SG_FREE(ccp);
00730     SG_FREE(g);
00731     SG_FREE(v);
00732     SG_FREE(Aw);
00733     SG_FREE(Awp);
00734     SG_FREE(Av);
00735     SG_FREE(As);
00736     SG_FREE(gc);
00737     SG_FREE(z_flsa);
00738     SG_FREE(z0_flsa);
00739 
00740     return slep_result_t(w,c);
00741 };
00742 };
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation