00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <ctype.h>
00024 #include <algorithm>
00025
00026 #include <shogun/io/SGIO.h>
00027 #include <shogun/mathematics/Math.h>
00028 #include <shogun/features/SparseFeatures.h>
00029 #include <shogun/lib/external/ssl.h>
00030
00031 namespace shogun
00032 {
00033 void ssl_train(struct data *Data,
00034 struct options *Options,
00035 struct vector_double *Weights,
00036 struct vector_double *Outputs)
00037 {
00038
00039 initialize(Weights,Data->n,0.0);
00040 initialize(Outputs,Data->m,0.0);
00041 vector_int *Subset = SG_MALLOC(vector_int, 1);
00042 initialize(Subset,Data->m);
00043
00044 int32_t optimality = 0;
00045 switch(Options->algo)
00046 {
00047 case -1:
00048 SG_SINFO("Regularized Least Squares Regression (CGLS)\n");
00049 optimality=CGLS(Data,Options,Subset,Weights,Outputs);
00050 break;
00051 case RLS:
00052 SG_SINFO("Regularized Least Squares Classification (CGLS)\n");
00053 optimality=CGLS(Data,Options,Subset,Weights,Outputs);
00054 break;
00055 case SVM:
00056 SG_SINFO("Modified Finite Newton L2-SVM (L2-SVM-MFN)\n");
00057 optimality=L2_SVM_MFN(Data,Options,Weights,Outputs,0);
00058 break;
00059 case TSVM:
00060 SG_SINFO("Transductive L2-SVM (TSVM)\n");
00061 optimality=TSVM_MFN(Data,Options,Weights,Outputs);
00062 break;
00063 case DA_SVM:
00064 SG_SINFO("Deterministic Annealing Semi-supervised L2-SVM (DAS3VM)\n");
00065 optimality=DA_S3VM(Data,Options,Weights,Outputs);
00066 break;
00067 default:
00068 SG_SERROR("Algorithm unspecified\n");
00069 }
00070
00071 if (!optimality)
00072 SG_SWARNING("SSL-Algorithm terminated without reaching optimum.\n");
00073
00074 SG_FREE(Subset->vec);
00075 SG_FREE(Subset);
00076 return;
00077 }
00078
00079 int32_t CGLS(
00080 const struct data *Data, const struct options *Options,
00081 const struct vector_int *Subset, struct vector_double *Weights,
00082 struct vector_double *Outputs)
00083 {
00084 SG_SDEBUG("CGLS starting...");
00085
00086
00087 int32_t active = Subset->d;
00088 int32_t *J = Subset->vec;
00089 CDotFeatures* features=Data->features;
00090 float64_t *Y = Data->Y;
00091 float64_t *C = Data->C;
00092 int32_t n = Data->n;
00093 float64_t lambda = Options->lambda;
00094 int32_t cgitermax = Options->cgitermax;
00095 float64_t epsilon = Options->epsilon;
00096 float64_t *beta = Weights->vec;
00097 float64_t *o = Outputs->vec;
00098
00099 float64_t *z = SG_MALLOC(float64_t, active);
00100 float64_t *q = SG_MALLOC(float64_t, active);
00101 int32_t ii=0;
00102 for (int32_t i = active ; i-- ;){
00103 ii=J[i];
00104 z[i] = C[ii]*(Y[ii] - o[ii]);
00105 }
00106 float64_t *r = SG_MALLOC(float64_t, n);
00107 for (int32_t i = n ; i-- ;)
00108 r[i] = 0.0;
00109 for (register int32_t j=0; j < active; j++)
00110 {
00111 features->add_to_dense_vec(z[j], J[j], r, n-1);
00112 r[n-1]+=Options->bias*z[j];
00113 }
00114 float64_t *p = SG_MALLOC(float64_t, n);
00115 float64_t omega1 = 0.0;
00116 for (int32_t i = n ; i-- ;)
00117 {
00118 r[i] -= lambda*beta[i];
00119 p[i] = r[i];
00120 omega1 += r[i]*r[i];
00121 }
00122 float64_t omega_p = omega1;
00123 float64_t omega_q = 0.0;
00124 float64_t inv_omega2 = 1/omega1;
00125 float64_t scale = 0.0;
00126 float64_t omega_z=0.0;
00127 float64_t gamma = 0.0;
00128 int32_t cgiter = 0;
00129 int32_t optimality = 0;
00130 float64_t epsilon2 = epsilon*epsilon;
00131
00132 while(cgiter < cgitermax)
00133 {
00134 cgiter++;
00135 omega_q=0.0;
00136 float64_t t=0.0;
00137 register int32_t i,j;
00138
00139 for (i=0; i < active; i++)
00140 {
00141 ii=J[i];
00142 t=features->dense_dot(ii, p, n-1);
00143 t+=Options->bias*p[n-1];
00144 q[i]=t;
00145 omega_q += C[ii]*t*t;
00146 }
00147 gamma = omega1/(lambda*omega_p + omega_q);
00148 inv_omega2 = 1/omega1;
00149 for (i = n ; i-- ;)
00150 {
00151 r[i] = 0.0;
00152 beta[i] += gamma*p[i];
00153 }
00154 omega_z=0.0;
00155 for (i = active ; i-- ;)
00156 {
00157 ii=J[i];
00158 o[ii] += gamma*q[i];
00159 z[i] -= gamma*C[ii]*q[i];
00160 omega_z+=z[i]*z[i];
00161 }
00162 for (j=0; j < active; j++)
00163 {
00164 t=z[j];
00165
00166 features->add_to_dense_vec(t, J[j], r, n-1);
00167 r[n-1]+=Options->bias*t;
00168 }
00169 omega1 = 0.0;
00170 for (i = n ; i-- ;)
00171 {
00172 r[i] -= lambda*beta[i];
00173 omega1 += r[i]*r[i];
00174 }
00175 if(omega1 < epsilon2*omega_z)
00176 {
00177 optimality=1;
00178 break;
00179 }
00180 omega_p=0.0;
00181 scale=omega1*inv_omega2;
00182 for(i = n ; i-- ;)
00183 {
00184 p[i] = r[i] + p[i]*scale;
00185 omega_p += p[i]*p[i];
00186 }
00187 }
00188 SG_SDEBUG("...Done.");
00189 SG_SINFO("CGLS converged in %d iteration(s)", cgiter);
00190
00191 SG_FREE(z);
00192 SG_FREE(q);
00193 SG_FREE(r);
00194 SG_FREE(p);
00195 return optimality;
00196 }
00197
00198 int32_t L2_SVM_MFN(
00199 const struct data *Data, struct options *Options,
00200 struct vector_double *Weights, struct vector_double *Outputs,
00201 int32_t ini)
00202 {
00203
00204 CDotFeatures* features=Data->features;
00205 float64_t *Y = Data->Y;
00206 float64_t *C = Data->C;
00207 int32_t n = Data->n;
00208 int32_t m = Data->m;
00209 float64_t lambda = Options->lambda;
00210 float64_t epsilon;
00211 float64_t *w = Weights->vec;
00212 float64_t *o = Outputs->vec;
00213 float64_t F_old = 0.0;
00214 float64_t F = 0.0;
00215 float64_t diff=0.0;
00216 vector_int *ActiveSubset = SG_MALLOC(vector_int, 1);
00217 ActiveSubset->vec = SG_MALLOC(int32_t, m);
00218 ActiveSubset->d = m;
00219
00220 if(ini==0) {
00221 epsilon=BIG_EPSILON;
00222 Options->cgitermax=SMALL_CGITERMAX;
00223 Options->epsilon=BIG_EPSILON;
00224 }
00225 else {epsilon = Options->epsilon;}
00226 for (int32_t i=0;i<n;i++) F+=w[i]*w[i];
00227 F=0.5*lambda*F;
00228 int32_t active=0;
00229 int32_t inactive=m-1;
00230 for (int32_t i=0; i<m ; i++)
00231 {
00232 diff=1-Y[i]*o[i];
00233 if(diff>0)
00234 {
00235 ActiveSubset->vec[active]=i;
00236 active++;
00237 F+=0.5*C[i]*diff*diff;
00238 }
00239 else
00240 {
00241 ActiveSubset->vec[inactive]=i;
00242 inactive--;
00243 }
00244 }
00245 ActiveSubset->d=active;
00246 int32_t iter=0;
00247 int32_t opt=0;
00248 int32_t opt2=0;
00249 vector_double *Weights_bar = SG_MALLOC(vector_double, 1);
00250 vector_double *Outputs_bar = SG_MALLOC(vector_double, 1);
00251 float64_t *w_bar = SG_MALLOC(float64_t, n);
00252 float64_t *o_bar = SG_MALLOC(float64_t, m);
00253 Weights_bar->vec=w_bar;
00254 Outputs_bar->vec=o_bar;
00255 Weights_bar->d=n;
00256 Outputs_bar->d=m;
00257 float64_t delta=0.0;
00258 float64_t t=0.0;
00259 int32_t ii = 0;
00260 while(iter<MFNITERMAX)
00261 {
00262 iter++;
00263 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)\n", iter, active, F);
00264 for (int32_t i=n; i-- ;)
00265 w_bar[i]=w[i];
00266 for (int32_t i=m; i-- ;)
00267 o_bar[i]=o[i];
00268
00269 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
00270 for(register int32_t i=active; i < m; i++)
00271 {
00272 ii=ActiveSubset->vec[i];
00273
00274 t=features->dense_dot(ii, w_bar, n-1);
00275 t+=Options->bias*w_bar[n-1];
00276
00277 o_bar[ii]=t;
00278 }
00279 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
00280 opt2=1;
00281 for (int32_t i=0;i<m;i++)
00282 {
00283 ii=ActiveSubset->vec[i];
00284 if(i<active)
00285 opt2=(opt2 && (Y[ii]*o_bar[ii]<=1+epsilon));
00286 else
00287 opt2=(opt2 && (Y[ii]*o_bar[ii]>=1-epsilon));
00288 if(opt2==0) break;
00289 }
00290 if(opt && opt2)
00291 {
00292 if(epsilon==BIG_EPSILON)
00293 {
00294 epsilon=EPSILON;
00295 Options->epsilon=EPSILON;
00296 SG_SDEBUG("epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f", BIG_EPSILON , EPSILON);
00297 continue;
00298 }
00299 else
00300 {
00301 for (int32_t i=n; i-- ;)
00302 w[i]=w_bar[i];
00303 for (int32_t i=m; i-- ;)
00304 o[i]=o_bar[i];
00305 SG_FREE(ActiveSubset->vec);
00306 SG_FREE(ActiveSubset);
00307 SG_FREE(o_bar);
00308 SG_FREE(w_bar);
00309 SG_FREE(Weights_bar);
00310 SG_FREE(Outputs_bar);
00311 SG_SINFO("L2_SVM_MFN converged (optimality) in %d", iter);
00312 return 1;
00313 }
00314 }
00315 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m);
00316 SG_SDEBUG("LINE_SEARCH delta = %f\n", delta);
00317 F_old=F;
00318 F=0.0;
00319 for (int32_t i=n; i-- ;) {
00320 w[i]+=delta*(w_bar[i]-w[i]);
00321 F+=w[i]*w[i];
00322 }
00323 F=0.5*lambda*F;
00324 active=0;
00325 inactive=m-1;
00326 for (int32_t i=0; i<m ; i++)
00327 {
00328 o[i]+=delta*(o_bar[i]-o[i]);
00329 diff=1-Y[i]*o[i];
00330 if(diff>0)
00331 {
00332 ActiveSubset->vec[active]=i;
00333 active++;
00334 F+=0.5*C[i]*diff*diff;
00335 }
00336 else
00337 {
00338 ActiveSubset->vec[inactive]=i;
00339 inactive--;
00340 }
00341 }
00342 ActiveSubset->d=active;
00343 if(CMath::abs(F-F_old)<RELATIVE_STOP_EPS*CMath::abs(F_old))
00344 {
00345 SG_SINFO("L2_SVM_MFN converged (rel. criterion) in %d iterations", iter);
00346 return 2;
00347 }
00348 }
00349 SG_FREE(ActiveSubset->vec);
00350 SG_FREE(ActiveSubset);
00351 SG_FREE(o_bar);
00352 SG_FREE(w_bar);
00353 SG_FREE(Weights_bar);
00354 SG_FREE(Outputs_bar);
00355 SG_SINFO("L2_SVM_MFN converged (max iter exceeded) in %d iterations", iter);
00356 return 0;
00357 }
00358
00359 float64_t line_search(float64_t *w,
00360 float64_t *w_bar,
00361 float64_t lambda,
00362 float64_t *o,
00363 float64_t *o_bar,
00364 float64_t *Y,
00365 float64_t *C,
00366 int32_t d,
00367 int32_t l)
00368 {
00369 float64_t omegaL = 0.0;
00370 float64_t omegaR = 0.0;
00371 float64_t diff=0.0;
00372 for(int32_t i=d; i--; )
00373 {
00374 diff=w_bar[i]-w[i];
00375 omegaL+=w[i]*diff;
00376 omegaR+=w_bar[i]*diff;
00377 }
00378 omegaL=lambda*omegaL;
00379 omegaR=lambda*omegaR;
00380 float64_t L=0.0;
00381 float64_t R=0.0;
00382 int32_t ii=0;
00383 for (int32_t i=0;i<l;i++)
00384 {
00385 if(Y[i]*o[i]<1)
00386 {
00387 diff=C[i]*(o_bar[i]-o[i]);
00388 L+=(o[i]-Y[i])*diff;
00389 R+=(o_bar[i]-Y[i])*diff;
00390 }
00391 }
00392 L+=omegaL;
00393 R+=omegaR;
00394 Delta* deltas=SG_MALLOC(Delta, l);
00395 int32_t p=0;
00396 for(int32_t i=0;i<l;i++)
00397 {
00398 diff=Y[i]*(o_bar[i]-o[i]);
00399
00400 if(Y[i]*o[i]<1)
00401 {
00402 if(diff>0)
00403 {
00404 deltas[p].delta=(1-Y[i]*o[i])/diff;
00405 deltas[p].index=i;
00406 deltas[p].s=-1;
00407 p++;
00408 }
00409 }
00410 else
00411 {
00412 if(diff<0)
00413 {
00414 deltas[p].delta=(1-Y[i]*o[i])/diff;
00415 deltas[p].index=i;
00416 deltas[p].s=1;
00417 p++;
00418 }
00419 }
00420 }
00421 std::sort(deltas,deltas+p);
00422 float64_t delta_prime=0.0;
00423 for (int32_t i=0;i<p;i++)
00424 {
00425 delta_prime = L + deltas[i].delta*(R-L);
00426 if(delta_prime>=0)
00427 break;
00428 ii=deltas[i].index;
00429 diff=(deltas[i].s)*C[ii]*(o_bar[ii]-o[ii]);
00430 L+=diff*(o[ii]-Y[ii]);
00431 R+=diff*(o_bar[ii]-Y[ii]);
00432 }
00433 SG_FREE(deltas);
00434 return (-L/(R-L));
00435 }
00436
00437 int32_t TSVM_MFN(
00438 const struct data *Data, struct options *Options,
00439 struct vector_double *Weights, struct vector_double *Outputs)
00440 {
00441
00442 struct data *Data_Labeled = SG_MALLOC(data, 1);
00443 struct vector_double *Outputs_Labeled = SG_MALLOC(vector_double, 1);
00444 initialize(Outputs_Labeled,Data->l,0.0);
00445 SG_SDEBUG("Initializing weights, unknown labels");
00446 GetLabeledData(Data_Labeled,Data);
00447 L2_SVM_MFN(Data_Labeled, Options, Weights,Outputs_Labeled,0);
00449
00450
00451 int32_t p=0,q=0;
00452 float64_t t=0.0;
00453 int32_t *JU = SG_MALLOC(int32_t, Data->u);
00454 float64_t *ou = SG_MALLOC(float64_t, Data->u);
00455 float64_t lambda_0 = TSVM_LAMBDA_SMALL;
00456 for (int32_t i=0;i<Data->m;i++)
00457 {
00458 if(Data->Y[i]==0.0)
00459 {
00460 t=Data->features->dense_dot(i, Weights->vec, Data->n-1);
00461 t+=Options->bias*Weights->vec[Data->n-1];
00462
00463 Outputs->vec[i]=t;
00464 Data->C[i]=lambda_0*1.0/Data->u;
00465 JU[q]=i;
00466 ou[q]=t;
00467 q++;
00468 }
00469 else
00470 {
00471 Outputs->vec[i]=Outputs_Labeled->vec[p];
00472 Data->C[i]=1.0/Data->l;
00473 p++;
00474 }
00475 }
00476 std::nth_element(ou,ou+int32_t((1-Options->R)*Data->u-1),ou+Data->u);
00477 float64_t thresh=*(ou+int32_t((1-Options->R)*Data->u)-1);
00478 SG_FREE(ou);
00479 for (int32_t i=0;i<Data->u;i++)
00480 {
00481 if(Outputs->vec[JU[i]]>thresh)
00482 Data->Y[JU[i]]=1.0;
00483 else
00484 Data->Y[JU[i]]=-1.0;
00485 }
00486 for (int32_t i=0;i<Data->n;i++)
00487 Weights->vec[i]=0.0;
00488 for (int32_t i=0;i<Data->m;i++)
00489 Outputs->vec[i]=0.0;
00490 L2_SVM_MFN(Data,Options,Weights,Outputs,0);
00491 int32_t num_switches=0;
00492 int32_t s=0;
00493 int32_t last_round=0;
00494 while(lambda_0 <= Options->lambda_u)
00495 {
00496 int32_t iter2=0;
00497 while(1){
00498 s=switch_labels(Data->Y,Outputs->vec,JU,Data->u,Options->S);
00499 if(s==0) break;
00500 iter2++;
00501 SG_SDEBUG("****** lambda_0 = %f iteration = %d ************************************\n", lambda_0, iter2);
00502 SG_SDEBUG("Optimizing unknown labels. switched %d labels.\n");
00503 num_switches+=s;
00504 SG_SDEBUG("Optimizing weights\n");
00505 L2_SVM_MFN(Data,Options,Weights,Outputs,1);
00506 }
00507 if(last_round==1) break;
00508 lambda_0=TSVM_ANNEALING_RATE*lambda_0;
00509 if(lambda_0 >= Options->lambda_u) {lambda_0 = Options->lambda_u; last_round=1;}
00510 for (int32_t i=0;i<Data->u;i++)
00511 Data->C[JU[i]]=lambda_0*1.0/Data->u;
00512 SG_SDEBUG("****** lambda0 increased to %f%% of lambda_u = %f ************************\n", lambda_0*100/Options->lambda_u, Options->lambda_u);
00513 SG_SDEBUG("Optimizing weights\n");
00514 L2_SVM_MFN(Data,Options,Weights,Outputs,1);
00515 }
00516 SG_SDEBUG("Total Number of Switches = %d\n", num_switches);
00517
00518 for (int32_t i=0;i<Data->u;i++) Data->Y[JU[i]] = 0.0;
00519 float64_t F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00520 SG_SDEBUG("Objective Value = %f\n",F);
00521 delete [] JU;
00522 return num_switches;
00523 }
00524
00525 int32_t switch_labels(float64_t* Y, float64_t* o, int32_t* JU, int32_t u, int32_t S)
00526 {
00527 int32_t npos=0;
00528 int32_t nneg=0;
00529 for (int32_t i=0;i<u;i++)
00530 {
00531 if((Y[JU[i]]>0) && (o[JU[i]]<1.0)) npos++;
00532 if((Y[JU[i]]<0) && (-o[JU[i]]<1.0)) nneg++;
00533 }
00534 Delta* positive=SG_MALLOC(Delta, npos);
00535 Delta* negative=SG_MALLOC(Delta, nneg);
00536 int32_t p=0;
00537 int32_t n=0;
00538 int32_t ii=0;
00539 for (int32_t i=0;i<u;i++)
00540 {
00541 ii=JU[i];
00542 if((Y[ii]>0.0) && (o[ii]<1.0)) {
00543 positive[p].delta=o[ii];
00544 positive[p].index=ii;
00545 positive[p].s=0;
00546 p++;};
00547 if((Y[ii]<0.0) && (-o[ii]<1.0))
00548 {
00549 negative[n].delta=-o[ii];
00550 negative[n].index=ii;
00551 negative[n].s=0;
00552 n++;};
00553 }
00554 std::sort(positive,positive+npos);
00555 std::sort(negative,negative+nneg);
00556 int32_t s=-1;
00557 while(1)
00558 {
00559 s++;
00560 if((s>=S) || (positive[s].delta>=-negative[s].delta) || (s>=npos) || (s>=nneg))
00561 break;
00562 Y[positive[s].index]=-1.0;
00563 Y[negative[s].index]= 1.0;
00564 }
00565 SG_FREE(positive);
00566 SG_FREE(negative);
00567 return s;
00568 }
00569
00570 int32_t DA_S3VM(
00571 struct data *Data, struct options *Options, struct vector_double *Weights,
00572 struct vector_double *Outputs)
00573 {
00574 float64_t T = DA_INIT_TEMP*Options->lambda_u;
00575 int32_t iter1 = 0, iter2 =0;
00576 float64_t *p = SG_MALLOC(float64_t, Data->u);
00577 float64_t *q = SG_MALLOC(float64_t, Data->u);
00578 float64_t *g = SG_MALLOC(float64_t, Data->u);
00579 float64_t F,F_min;
00580 float64_t *w_min = SG_MALLOC(float64_t, Data->n);
00581 float64_t *o_min = SG_MALLOC(float64_t, Data->m);
00582 float64_t *w = Weights->vec;
00583 float64_t *o = Outputs->vec;
00584 float64_t kl_divergence = 1.0;
00585
00586 SG_SDEBUG("Initializing weights, p");
00587 for (int32_t i=0;i<Data->u; i++)
00588 p[i] = Options->R;
00589
00590 int32_t *JU = SG_MALLOC(int32_t, Data->u);
00591 int32_t j=0;
00592 for(int32_t i=0;i<Data->m;i++)
00593 {
00594 if(Data->Y[i]==0.0)
00595 {JU[j]=i;j++;}
00596 }
00597 float64_t H = entropy(p,Data->u);
00598 optimize_w(Data,p,Options,Weights,Outputs,0);
00599 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00600 F_min = F;
00601 for (int32_t i=0;i<Weights->d;i++)
00602 w_min[i]=w[i];
00603 for (int32_t i=0;i<Outputs->d;i++)
00604 o_min[i]=o[i];
00605 while((iter1 < DA_OUTER_ITERMAX) && (H > Options->epsilon))
00606 {
00607 iter1++;
00608 iter2=0;
00609 kl_divergence=1.0;
00610 while((iter2 < DA_INNER_ITERMAX) && (kl_divergence > Options->epsilon))
00611 {
00612 iter2++;
00613 for (int32_t i=0;i<Data->u;i++)
00614 {
00615 q[i]=p[i];
00616 g[i] = Options->lambda_u*((o[JU[i]] > 1 ? 0 : (1 - o[JU[i]])*(1 - o[JU[i]])) - (o[JU[i]]< -1 ? 0 : (1 + o[JU[i]])*(1 + o[JU[i]])));
00617 }
00618 SG_SDEBUG("Optimizing p.\n");
00619 optimize_p(g,Data->u,T,Options->R,p);
00620 kl_divergence=KL(p,q,Data->u);
00621 SG_SDEBUG("Optimizing weights\n");
00622 optimize_w(Data,p,Options,Weights,Outputs,1);
00623 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00624 if(F < F_min)
00625 {
00626 F_min = F;
00627 for (int32_t i=0;i<Weights->d;i++)
00628 w_min[i]=w[i];
00629 for (int32_t i=0;i<Outputs->d;i++)
00630 o_min[i]=o[i];
00631 }
00632 SG_SDEBUG("***** outer_iter = %d T = %g inner_iter = %d kl = %g cost = %g *****\n",iter1,T,iter2,kl_divergence,F);
00633 }
00634 H = entropy(p,Data->u);
00635 SG_SDEBUG("***** Finished outer_iter = %d T = %g Entropy = %g ***\n", iter1,T,H);
00636 T = T/DA_ANNEALING_RATE;
00637 }
00638 for (int32_t i=0;i<Weights->d;i++)
00639 w[i]=w_min[i];
00640 for (int32_t i=0;i<Outputs->d;i++)
00641 o[i]=o_min[i];
00642
00643 SG_FREE(p);
00644 SG_FREE(q);
00645 SG_FREE(g);
00646 SG_FREE(JU);
00647 SG_FREE(w_min);
00648 SG_FREE(o_min);
00649 SG_SINFO("(min) Objective Value = %f", F_min);
00650 return 1;
00651 }
00652
00653 int32_t optimize_w(
00654 const struct data *Data, const float64_t *p, struct options *Options,
00655 struct vector_double *Weights, struct vector_double *Outputs, int32_t ini)
00656 {
00657 int32_t i,j;
00658 CDotFeatures* features=Data->features;
00659 int32_t n = Data->n;
00660 int32_t m = Data->m;
00661 int32_t u = Data->u;
00662 float64_t lambda = Options->lambda;
00663 float64_t epsilon;
00664 float64_t *w = Weights->vec;
00665 float64_t *o = SG_MALLOC(float64_t, m+u);
00666 float64_t *Y = SG_MALLOC(float64_t, m+u);
00667 float64_t *C = SG_MALLOC(float64_t, m+u);
00668 int32_t *labeled_indices = SG_MALLOC(int32_t, m);
00669 float64_t F_old = 0.0;
00670 float64_t F = 0.0;
00671 float64_t diff=0.0;
00672 float64_t lambda_u_by_u = Options->lambda_u/u;
00673 vector_int *ActiveSubset = SG_MALLOC(vector_int, 1);
00674 ActiveSubset->vec = SG_MALLOC(int32_t, m);
00675 ActiveSubset->d = m;
00676
00677 if(ini==0)
00678 {
00679 epsilon=BIG_EPSILON;
00680 Options->cgitermax=SMALL_CGITERMAX;
00681 Options->epsilon=BIG_EPSILON;}
00682 else {epsilon = Options->epsilon;}
00683
00684 for(i=0;i<n;i++) F+=w[i]*w[i];
00685 F=lambda*F;
00686 int32_t active=0;
00687 int32_t inactive=m-1;
00688 float64_t temp1;
00689 float64_t temp2;
00690
00691 j = 0;
00692 for(i=0; i<m ; i++)
00693 {
00694 o[i]=Outputs->vec[i];
00695 if(Data->Y[i]==0.0)
00696 {
00697 labeled_indices[i]=0;
00698 o[m+j]=o[i];
00699 Y[i]=1;
00700 Y[m+j]=-1;
00701 C[i]=lambda_u_by_u*p[j];
00702 C[m+j]=lambda_u_by_u*(1-p[j]);
00703 ActiveSubset->vec[active]=i;
00704 active++;
00705 diff = 1 - CMath::abs(o[i]);
00706 if(diff>0)
00707 {
00708 Data->Y[i] = 2*p[j]-1;
00709 Data->C[i] = lambda_u_by_u;
00710 temp1 = (1 - o[i]);
00711 temp2 = (1 + o[i]);
00712 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
00713 }
00714 else
00715 {
00716 if(o[i]>0)
00717 {
00718 Data->Y[i] = -1.0;
00719 Data->C[i] = C[m+j];
00720 }
00721 else
00722 {
00723 Data->Y[i] = 1.0;
00724 Data->C[i] = C[i];
00725 }
00726 temp1 = (1-Data->Y[i]*o[i]);
00727 F+= Data->C[i]*temp1*temp1;
00728 }
00729 j++;
00730 }
00731 else
00732 {
00733 labeled_indices[i]=1;
00734 Y[i]=Data->Y[i];
00735 C[i]=1.0/Data->l;
00736 Data->C[i]=1.0/Data->l;
00737 diff=1-Data->Y[i]*o[i];
00738 if(diff>0)
00739 {
00740 ActiveSubset->vec[active]=i;
00741 active++;
00742 F+=Data->C[i]*diff*diff;
00743 }
00744 else
00745 {
00746 ActiveSubset->vec[inactive]=i;
00747 inactive--;
00748 }
00749 }
00750 }
00751 F=0.5*F;
00752 ActiveSubset->d=active;
00753 int32_t iter=0;
00754 int32_t opt=0;
00755 int32_t opt2=0;
00756 vector_double *Weights_bar = SG_MALLOC(vector_double, 1);
00757 vector_double *Outputs_bar = SG_MALLOC(vector_double, 1);
00758 float64_t *w_bar = SG_MALLOC(float64_t, n);
00759 float64_t *o_bar = SG_MALLOC(float64_t, m+u);
00760 Weights_bar->vec=w_bar;
00761 Outputs_bar->vec=o_bar;
00762 Weights_bar->d=n;
00763 Outputs_bar->d=m;
00764 float64_t delta=0.0;
00765 float64_t t=0.0;
00766 int32_t ii = 0;
00767 while(iter<MFNITERMAX)
00768 {
00769 iter++;
00770 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)", iter, active, F);
00771 for(i=n; i-- ;)
00772 w_bar[i]=w[i];
00773
00774 for(i=m+u; i-- ;)
00775 o_bar[i]=o[i];
00776 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
00777 for(i=active; i < m; i++)
00778 {
00779 ii=ActiveSubset->vec[i];
00780 t=features->dense_dot(ii, w_bar, n-1);
00781 t+=Options->bias*w_bar[n-1];
00782
00783 o_bar[ii]=t;
00784 }
00785
00786 j=0;
00787 for(i=0; i<m;i++)
00788 {
00789 if(labeled_indices[i]==0)
00790 {o_bar[m+j]=o_bar[i]; j++;};
00791 }
00792 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
00793 opt2=1;
00794 for(i=0; i < m ;i++)
00795 {
00796 ii=ActiveSubset->vec[i];
00797 if(i<active)
00798 {
00799 if(labeled_indices[ii]==1)
00800 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]<=1+epsilon));
00801 else
00802 {
00803 if(CMath::abs(o[ii])<1)
00804 opt2=(opt2 && (CMath::abs(o_bar[ii])<=1+epsilon));
00805 else
00806 opt2=(opt2 && (CMath::abs(o_bar[ii])>=1-epsilon));
00807 }
00808 }
00809 else
00810 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]>=1-epsilon));
00811 if(opt2==0) break;
00812 }
00813 if(opt && opt2)
00814 {
00815 if(epsilon==BIG_EPSILON)
00816 {
00817 epsilon=EPSILON;
00818 Options->epsilon=EPSILON;
00819 SG_SDEBUG( "epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f\n", BIG_EPSILON, EPSILON);
00820 continue;
00821 }
00822 else
00823 {
00824 for(i=n; i-- ;)
00825 w[i]=w_bar[i];
00826 for(i=m; i-- ;)
00827 Outputs->vec[i]=o_bar[i];
00828 for(i=m; i-- ;)
00829 {
00830 if(labeled_indices[i]==0)
00831 Data->Y[i]=0.0;
00832 }
00833 SG_FREE(ActiveSubset->vec);
00834 SG_FREE(ActiveSubset);
00835 SG_FREE(o_bar);
00836 SG_FREE(w_bar);
00837 SG_FREE(o);
00838 SG_FREE(Weights_bar);
00839 SG_FREE(Outputs_bar);
00840 SG_FREE(Y);
00841 SG_FREE(C);
00842 SG_FREE(labeled_indices);
00843 SG_SINFO("L2_SVM_MFN converged in %d iteration(s)", iter);
00844 return 1;
00845 }
00846 }
00847
00848 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m+u);
00849 SG_SDEBUG("LINE_SEARCH delta = %f", delta);
00850 F_old=F;
00851 F=0.0;
00852 for(i=0;i<n;i++) {w[i]+=delta*(w_bar[i]-w[i]); F+=w[i]*w[i];}
00853 F=lambda*F;
00854 j=0;
00855 active=0;
00856 inactive=m-1;
00857 for(i=0; i<m ; i++)
00858 {
00859 o[i]+=delta*(o_bar[i]-o[i]);
00860 if(labeled_indices[i]==0)
00861 {
00862 o[m+j]=o[i];
00863 ActiveSubset->vec[active]=i;
00864 active++;
00865 diff = 1 - CMath::abs(o[i]);
00866 if(diff>0)
00867 {
00868 Data->Y[i] = 2*p[j]-1;
00869 Data->C[i] = lambda_u_by_u;
00870 temp1 = (1 - o[i]);
00871 temp2 = (1 + o[i]);
00872 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
00873 }
00874 else
00875 {
00876 if(o[i]>0)
00877 {
00878 Data->Y[i] = -1;
00879 Data->C[i] = C[m+j];
00880 }
00881 else
00882 {
00883 Data->Y[i] = +1;
00884 Data->C[i] = C[i];
00885 }
00886 temp1=(1-Data->Y[i]*o[i]);
00887 F+= Data->C[i]*temp1*temp1;
00888 }
00889 j++;
00890 }
00891 else
00892 {
00893 diff=1-Data->Y[i]*o[i];
00894 if(diff>0)
00895 {
00896 ActiveSubset->vec[active]=i;
00897 active++;
00898 F+=Data->C[i]*diff*diff;
00899 }
00900 else
00901 {
00902 ActiveSubset->vec[inactive]=i;
00903 inactive--;
00904 }
00905 }
00906 }
00907 F=0.5*F;
00908 ActiveSubset->d=active;
00909 if(CMath::abs(F-F_old)<EPSILON)
00910 break;
00911 }
00912 for(i=m; i-- ;)
00913 {
00914 Outputs->vec[i]=o[i];
00915 if(labeled_indices[i]==0)
00916 Data->Y[i]=0.0;
00917 }
00918 SG_FREE(ActiveSubset->vec);
00919 SG_FREE(labeled_indices);
00920 SG_FREE(ActiveSubset);
00921 SG_FREE(o_bar);
00922 SG_FREE(w_bar);
00923 SG_FREE(o);
00924 SG_FREE(Weights_bar);
00925 SG_FREE(Outputs_bar);
00926 SG_FREE(Y);
00927 SG_FREE(C);
00928 SG_SINFO("L2_SVM_MFN converged in %d iterations", iter);
00929 return 0;
00930 }
00931
00932 void optimize_p(
00933 const float64_t* g, int32_t u, float64_t T, float64_t r, float64_t* p)
00934 {
00935 int32_t iter=0;
00936 float64_t epsilon=1e-10;
00937 int32_t maxiter=500;
00938 float64_t nu_minus=g[0];
00939 float64_t nu_plus=g[0];
00940 for (int32_t i=0;i<u;i++)
00941 {
00942 if(g[i]<nu_minus) nu_minus=g[i];
00943 if(g[i]>nu_plus) nu_plus=g[i];
00944 };
00945
00946 float64_t b=T*log((1-r)/r);
00947 nu_minus-=b;
00948 nu_plus-=b;
00949 float64_t nu=(nu_plus+nu_minus)/2;
00950 float64_t Bnu=0.0;
00951 float64_t BnuPrime=0.0;
00952 float64_t s=0.0;
00953 float64_t tmp=0.0;
00954 for (int32_t i=0;i<u;i++)
00955 {
00956 s=exp((g[i]-nu)/T);
00957 if(!(CMath::is_infinity(s)))
00958 {
00959 tmp=1.0/(1.0+s);
00960 Bnu+=tmp;
00961 BnuPrime+=s*tmp*tmp;
00962 }
00963 }
00964 Bnu=Bnu/u;
00965 Bnu-=r;
00966 BnuPrime=BnuPrime/(T*u);
00967 float64_t nuHat=0.0;
00968 while((CMath::abs(Bnu)>epsilon) && (iter < maxiter))
00969 {
00970 iter++;
00971 if(CMath::abs(BnuPrime)>0.0)
00972 nuHat=nu-Bnu/BnuPrime;
00973 if((CMath::abs(BnuPrime) > 0.0) | (nuHat>nu_plus) | (nuHat < nu_minus))
00974 nu=(nu_minus+nu_plus)/2.0;
00975 else
00976 nu=nuHat;
00977 Bnu=0.0;
00978 BnuPrime=0.0;
00979 for(int32_t i=0;i<u;i++)
00980 {
00981 s=exp((g[i]-nu)/T);
00982 if(!(CMath::is_infinity(s)))
00983 {
00984 tmp=1.0/(1.0+s);
00985 Bnu+=tmp;
00986 BnuPrime+=s*tmp*tmp;
00987 }
00988 }
00989 Bnu=Bnu/u;
00990 Bnu-=r;
00991 BnuPrime=BnuPrime/(T*u);
00992 if(Bnu<0)
00993 nu_minus=nu;
00994 else
00995 nu_plus=nu;
00996 if(CMath::abs(nu_minus-nu_plus)<epsilon)
00997 break;
00998 }
00999 if(CMath::abs(Bnu)>epsilon)
01000 SG_SWARNING("Warning (Root): root not found to required precision\n");
01001
01002 for (int32_t i=0;i<u;i++)
01003 {
01004 s=exp((g[i]-nu)/T);
01005 if(CMath::is_infinity(s)) p[i]=0.0;
01006 else p[i]=1.0/(1.0+s);
01007 }
01008 SG_SINFO(" root (nu) = %f B(nu) = %f", nu, Bnu);
01009 }
01010
01011 float64_t transductive_cost(
01012 float64_t normWeights, float64_t *Y, float64_t *Outputs, int32_t m,
01013 float64_t lambda, float64_t lambda_u)
01014 {
01015 float64_t F1=0.0,F2=0.0, o=0.0, y=0.0;
01016 int32_t u=0,l=0;
01017 for (int32_t i=0;i<m;i++)
01018 {
01019 o=Outputs[i];
01020 y=Y[i];
01021 if(y==0.0)
01022 {F1 += CMath::abs(o) > 1 ? 0 : (1 - CMath::abs(o))*(1 - CMath::abs(o)); u++;}
01023 else
01024 {F2 += y*o > 1 ? 0 : (1-y*o)*(1-y*o); l++;}
01025 }
01026 float64_t F;
01027 F = 0.5*(lambda*normWeights + lambda_u*F1/u + F2/l);
01028 return F;
01029 }
01030
01031 float64_t entropy(const float64_t *p, int32_t u)
01032 {
01033 float64_t h=0.0;
01034 float64_t q=0.0;
01035 for (int32_t i=0;i<u;i++)
01036 {
01037 q=p[i];
01038 if(q>0 && q<1)
01039 h+= -(q*CMath::log2(q) + (1-q)*CMath::log2(1-q));
01040 }
01041 return h/u;
01042 }
01043
01044 float64_t KL(const float64_t *p, const float64_t *q, int32_t u)
01045 {
01046 float64_t h=0.0;
01047 float64_t p1=0.0;
01048 float64_t q1=0.0;
01049 float64_t g=0.0;
01050 for (int32_t i=0;i<u;i++)
01051 {
01052 p1=p[i];
01053 q1=q[i];
01054 if(p1>1-1e-8) p1-=1e-8;
01055 if(p1<1-1e-8) p1+=1e-8;
01056 if(q1>1-1e-8) q1-=1e-8;
01057 if(q1<1-1e-8) q1+=1e-8;
01058 g= (p1*CMath::log2(p1/q1) + (1-p1)*CMath::log2((1-p1)/(1-q1)));
01059 if(CMath::abs(g)<1e-12 || CMath::is_nan(g)) g=0.0;
01060 h+=g;
01061 }
01062 return h/u;
01063 }
01064
01065
01066 float64_t norm_square(const vector_double *A)
01067 {
01068 float64_t x=0.0, t=0.0;
01069 for(int32_t i=0;i<A->d;i++)
01070 {
01071 t=A->vec[i];
01072 x+=t*t;
01073 }
01074 return x;
01075 }
01076
01077 void initialize(struct vector_double *A, int32_t k, float64_t a)
01078 {
01079 float64_t *vec = SG_MALLOC(float64_t, k);
01080 for (int32_t i=0;i<k;i++)
01081 vec[i]=a;
01082 A->vec = vec;
01083 A->d = k;
01084 return;
01085 }
01086
01087 void initialize(struct vector_int *A, int32_t k)
01088 {
01089 int32_t *vec = SG_MALLOC(int32_t, k);
01090 for(int32_t i=0;i<k;i++)
01091 vec[i]=i;
01092 A->vec = vec;
01093 A->d = k;
01094 return;
01095 }
01096
01097 void GetLabeledData(struct data *D, const struct data *Data)
01098 {
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135
01136
01137
01138 }
01139 }