00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #include <vector>
00014
00015 #include <shogun/lib/config.h>
00016
00017 #ifdef HAVE_LAPACK
00018 #include <shogun/io/SGIO.h>
00019 #include <shogun/lib/Signal.h>
00020 #include <shogun/lib/Time.h>
00021 #include <shogun/base/Parameter.h>
00022 #include <shogun/transfer/multitask/LibLinearMTL.h>
00023 #include <shogun/optimization/liblinear/tron.h>
00024 #include <shogun/features/DotFeatures.h>
00025
00026 using namespace shogun;
00027
00028
00029 CLibLinearMTL::CLibLinearMTL()
00030 : CLinearMachine()
00031 {
00032 init();
00033 }
00034
00035 CLibLinearMTL::CLibLinearMTL(
00036 float64_t C, CDotFeatures* traindat, CLabels* trainlab)
00037 : CLinearMachine()
00038 {
00039 init();
00040 C1=C;
00041 C2=C;
00042 use_bias=true;
00043
00044 set_features(traindat);
00045 set_labels(trainlab);
00046
00047 }
00048
00049
00050 void CLibLinearMTL::init()
00051 {
00052 use_bias=false;
00053 C1=1;
00054 C2=1;
00055 set_max_iterations();
00056 epsilon=1e-5;
00057
00058 SG_ADD(&C1, "C1", "C Cost constant 1.", MS_AVAILABLE);
00059 SG_ADD(&C2, "C2", "C Cost constant 2.", MS_AVAILABLE);
00060 SG_ADD(&use_bias, "use_bias", "Indicates if bias is used.",
00061 MS_NOT_AVAILABLE);
00062 SG_ADD(&epsilon, "epsilon", "Convergence precision.", MS_NOT_AVAILABLE);
00063 SG_ADD(&max_iterations, "max_iterations", "Max number of iterations.",
00064 MS_NOT_AVAILABLE);
00065
00066 }
00067
00068 CLibLinearMTL::~CLibLinearMTL()
00069 {
00070 }
00071
00072 bool CLibLinearMTL::train_machine(CFeatures* data)
00073 {
00074 CSignal::clear_cancel();
00075 ASSERT(m_labels);
00076
00077 if (data)
00078 {
00079 if (!data->has_property(FP_DOT))
00080 SG_ERROR("Specified features are not of type CDotFeatures\n");
00081
00082 set_features((CDotFeatures*) data);
00083 }
00084 ASSERT(features);
00085 m_labels->ensure_valid();
00086
00087
00088 int32_t num_train_labels=m_labels->get_num_labels();
00089 int32_t num_feat=features->get_dim_feature_space();
00090 int32_t num_vec=features->get_num_vectors();
00091
00092 if (num_vec!=num_train_labels)
00093 {
00094 SG_ERROR("number of vectors %d does not match "
00095 "number of training labels %d\n",
00096 num_vec, num_train_labels);
00097 }
00098
00099
00100 float64_t* training_w = NULL;
00101 if (use_bias)
00102 training_w=SG_MALLOC(float64_t, num_feat+1);
00103 else
00104 training_w=SG_MALLOC(float64_t, num_feat+0);
00105
00106 problem prob;
00107 if (use_bias)
00108 {
00109 prob.n=num_feat+1;
00110 memset(training_w, 0, sizeof(float64_t)*(num_feat+1));
00111 }
00112 else
00113 {
00114 prob.n=num_feat;
00115 memset(training_w, 0, sizeof(float64_t)*(num_feat+0));
00116 }
00117 prob.l=num_vec;
00118 prob.x=features;
00119 prob.y=SG_MALLOC(float64_t, prob.l);
00120 prob.use_bias=use_bias;
00121
00122 for (int32_t i=0; i<prob.l; i++)
00123 prob.y[i]=((CBinaryLabels*)m_labels)->get_label(i);
00124
00125 int pos = 0;
00126 int neg = 0;
00127 for(int i=0;i<prob.l;i++)
00128 {
00129 if(prob.y[i]==+1)
00130 pos++;
00131 }
00132 neg = prob.l - pos;
00133
00134 SG_INFO("%d training points %d dims\n", prob.l, prob.n);
00135 SG_INFO("%d positives, %d negatives\n", pos, neg);
00136
00137 double Cp=C1;
00138 double Cn=C2;
00139 solve_l2r_l1l2_svc(&prob, epsilon, Cp, Cn);
00140
00141 if (use_bias)
00142 set_bias(training_w[num_feat]);
00143 else
00144 set_bias(0);
00145
00146 SG_FREE(prob.y);
00147
00148 w = SGVector<float64_t>(num_feat);
00149 for (int32_t i=0; i<num_feat; i++)
00150 w[i] = training_w[i];
00151
00152 return true;
00153 }
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178
00179 #undef GETI
00180 #define GETI(i) (y[i]+1)
00181
00182
00183
00184 void CLibLinearMTL::solve_l2r_l1l2_svc(const problem *prob, double eps, double Cp, double Cn)
00185 {
00186
00187
00188
00189 int l = prob->l;
00190 int w_size = prob->n;
00191 int i, s, iter = 0;
00192 double C, d, G;
00193 double *QD = SG_MALLOC(double, l);
00194 int *index = SG_MALLOC(int, l);
00195
00196
00197 int32_t *y = SG_MALLOC(int32_t, l);
00198 int active_size = l;
00199
00200 double PG;
00201 double PGmax_old = CMath::INFTY;
00202 double PGmin_old = -CMath::INFTY;
00203 double PGmax_new, PGmin_new;
00204
00205
00206 V = SGMatrix<float64_t>(w_size,num_tasks);
00207
00208
00209 alphas = SGVector<float64_t>(l);
00210
00211
00212
00213 double diag[3] = {0.5/Cn, 0, 0.5/Cp};
00214 double upper_bound[3] = {CMath::INFTY, 0, CMath::INFTY};
00215 if(true)
00216 {
00217 diag[0] = 0;
00218 diag[2] = 0;
00219 upper_bound[0] = Cn;
00220 upper_bound[2] = Cp;
00221 }
00222
00223 int n = prob->n;
00224
00225 if (prob->use_bias)
00226 n--;
00227
00228
00229 for(int32_t k=0; k<w_size*num_tasks; k++)
00230 {
00231 V.matrix[k] = 0;
00232 }
00233
00234
00235 for(i=0; i<l; i++)
00236 {
00237 alphas[i] = 0;
00238 }
00239
00240 for(i=0; i<l; i++)
00241 {
00242 if(prob->y[i] > 0)
00243 {
00244 y[i] = +1;
00245 }
00246 else
00247 {
00248 y[i] = -1;
00249 }
00250 QD[i] = diag[GETI(i)];
00251 QD[i] += prob->x->dot(i, prob->x,i);
00252 index[i] = i;
00253 }
00254
00255 CTime start_time;
00256 while (iter < max_iterations && !CSignal::cancel_computations())
00257 {
00258 if (m_max_train_time > 0 && start_time.cur_time_diff() > m_max_train_time)
00259 break;
00260
00261 PGmax_new = -CMath::INFTY;
00262 PGmin_new = CMath::INFTY;
00263
00264 for (i=0; i<active_size; i++)
00265 {
00266 int j = i+rand()%(active_size-i);
00267 CMath::swap(index[i], index[j]);
00268 }
00269
00270 for (s=0;s<active_size;s++)
00271 {
00272 i = index[s];
00273 int32_t yi = y[i];
00274 int32_t ti = task_indicator_lhs[i];
00275 C = upper_bound[GETI(i)];
00276
00277
00278
00279 typedef std::map<index_t, float64_t>::const_iterator map_iter;
00280
00281 float64_t inner_sum = 0;
00282 for (map_iter it=task_similarity_matrix.data[ti].begin(); it!=task_similarity_matrix.data[ti].end(); it++)
00283 {
00284
00285
00286 int32_t e_i = it->first;
00287 float64_t sim = it->second;
00288
00289
00290 float64_t* tmp_w = V.get_column_vector(e_i);
00291 inner_sum += sim * yi * prob->x->dense_dot(i, tmp_w, n);
00292
00293
00294
00295
00296 }
00297
00298
00299 G = inner_sum-1.0;
00300
00301
00302 PG = 0;
00303 if (alphas[i] == 0)
00304 {
00305 if (G > PGmax_old)
00306 {
00307 active_size--;
00308 CMath::swap(index[s], index[active_size]);
00309 s--;
00310 continue;
00311 }
00312 else if (G < 0)
00313 PG = G;
00314 }
00315 else if (alphas[i] == C)
00316 {
00317 if (G < PGmin_old)
00318 {
00319 active_size--;
00320 CMath::swap(index[s], index[active_size]);
00321 s--;
00322 continue;
00323 }
00324 else if (G > 0)
00325 PG = G;
00326 }
00327 else
00328 PG = G;
00329
00330 PGmax_new = CMath::max(PGmax_new, PG);
00331 PGmin_new = CMath::min(PGmin_new, PG);
00332
00333 if(fabs(PG) > 1.0e-12)
00334 {
00335
00336 double alpha_old = alphas[i];
00337
00338
00339 alphas[i] = CMath::min(CMath::max(alphas[i] - G/QD[i], 0.0), C);
00340 d = (alphas[i] - alpha_old)*yi;
00341
00342
00343 float64_t* tmp_w = V.get_column_vector(ti);
00344 prob->x->add_to_dense_vec(d, i, tmp_w, n);
00345
00346
00347
00348
00349 }
00350 }
00351
00352 iter++;
00353 float64_t gap=PGmax_new - PGmin_new;
00354 SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00355
00356 if(gap <= eps)
00357 {
00358 if(active_size == l)
00359 break;
00360 else
00361 {
00362 active_size = l;
00363 PGmax_old = CMath::INFTY;
00364 PGmin_old = -CMath::INFTY;
00365 continue;
00366 }
00367 }
00368 PGmax_old = PGmax_new;
00369 PGmin_old = PGmin_new;
00370 if (PGmax_old <= 0)
00371 PGmax_old = CMath::INFTY;
00372 if (PGmin_old >= 0)
00373 PGmin_old = -CMath::INFTY;
00374 }
00375
00376 SG_DONE();
00377 SG_INFO("optimization finished, #iter = %d\n",iter);
00378 if (iter >= max_iterations)
00379 {
00380 SG_WARNING("reaching max number of iterations\nUsing -s 2 may be faster"
00381 "(also see liblinear FAQ)\n\n");
00382 }
00383
00384
00385
00386 delete [] QD;
00387
00388 delete [] y;
00389 delete [] index;
00390 }
00391
00392
00393 float64_t CLibLinearMTL::compute_primal_obj()
00394 {
00395
00396
00397
00398
00399
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435 SG_INFO("DONE to compute Primal OBJ\n");
00436
00437 SGMatrix<float64_t> W = get_W();
00438
00439 float64_t obj = 0;
00440 int32_t num_vec = features->get_num_vectors();
00441 int32_t w_size = features->get_dim_feature_space();
00442
00443
00444 for (int32_t t=0; t<num_tasks; t++)
00445 {
00446 float64_t* w_t = W.get_column_vector(t);
00447
00448 for(int32_t i=0; i<w_size; i++)
00449 {
00450 obj += 0.5 * w_t[i]*w_t[i];
00451 }
00452 }
00453
00454
00455 for (int32_t s=0; s<num_tasks; s++)
00456 {
00457 float64_t* w_s = W.get_column_vector(s);
00458 for (int32_t t=0; t<num_tasks; t++)
00459 {
00460 float64_t* w_t = W.get_column_vector(t);
00461 float64_t l = graph_laplacian.matrix[s*num_tasks+t];
00462
00463 for(int32_t i=0; i<w_size; i++)
00464 {
00465 obj += 0.5 * l * w_s[i]*w_t[i];
00466 }
00467 }
00468 }
00469
00470
00471 for(int32_t i=0; i<num_vec; i++)
00472 {
00473 int32_t ti = task_indicator_lhs[i];
00474 float64_t* w_t = W.get_column_vector(ti);
00475 float64_t residual = ((CBinaryLabels*)m_labels)->get_label(i) * features->dense_dot(i, w_t, w_size);
00476
00477
00478 obj += C1 * CMath::max(0.0, 1 - residual);
00479
00480 }
00481
00482 SG_INFO("DONE to compute Primal OBJ, obj=%f\n",obj);
00483
00484 return obj;
00485 }
00486
00487 float64_t CLibLinearMTL::compute_dual_obj()
00488 {
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504 SG_INFO("starting to compute DUAL OBJ\n");
00505
00506 int32_t num_vec=features->get_num_vectors();
00507
00508 float64_t obj = 0;
00509
00510
00511 for(int32_t i=0; i<num_vec; i++)
00512 {
00513 obj += alphas[i];
00514 }
00515
00516
00517
00518 int32_t v_size = features->get_dim_feature_space();
00519
00520
00521 for (int32_t s=0; s<num_tasks; s++)
00522 {
00523 float64_t* v_s = V.get_column_vector(s);
00524 for (int32_t t=0; t<num_tasks; t++)
00525 {
00526 float64_t* v_t = V.get_column_vector(t);
00527 const float64_t ts = task_similarity_matrix(s, t);
00528
00529 for(int32_t i=0; i<v_size; i++)
00530 {
00531 obj -= 0.5 * ts * v_s[i]*v_t[i];
00532 }
00533 }
00534 }
00535
00536
00537
00538
00539
00540
00541
00542
00543
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553
00554
00555
00556
00557
00558 return obj;
00559 }
00560
00561
00562 float64_t CLibLinearMTL::compute_duality_gap()
00563 {
00564 return 0.0;
00565 }
00566
00567
00568 #endif //HAVE_LAPACK