00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include <stdio.h>
00022 #include <stdlib.h>
00023 #include <ctype.h>
00024 #include <algorithm>
00025
00026 #include "lib/io.h"
00027 #include "lib/Mathematics.h"
00028 #include "features/SparseFeatures.h"
00029 #include "classifier/svm/ssl.h"
00030
00031 namespace shogun
00032 {
00033 void ssl_train(struct data *Data,
00034 struct options *Options,
00035 struct vector_double *Weights,
00036 struct vector_double *Outputs)
00037 {
00038
00039 initialize(Weights,Data->n,0.0);
00040 initialize(Outputs,Data->m,0.0);
00041 vector_int *Subset = new vector_int[1];
00042 initialize(Subset,Data->m);
00043
00044 int32_t optimality = 0;
00045 switch(Options->algo)
00046 {
00047 case -1:
00048 SG_SINFO("Regularized Least Squares Regression (CGLS)\n");
00049 optimality=CGLS(Data,Options,Subset,Weights,Outputs);
00050 break;
00051 case RLS:
00052 SG_SINFO("Regularized Least Squares Classification (CGLS)\n");
00053 optimality=CGLS(Data,Options,Subset,Weights,Outputs);
00054 break;
00055 case SVM:
00056 SG_SINFO("Modified Finite Newton L2-SVM (L2-SVM-MFN)\n");
00057 optimality=L2_SVM_MFN(Data,Options,Weights,Outputs,0);
00058 break;
00059 case TSVM:
00060 SG_SINFO("Transductive L2-SVM (TSVM)\n");
00061 optimality=TSVM_MFN(Data,Options,Weights,Outputs);
00062 break;
00063 case DA_SVM:
00064 SG_SINFO("Deterministic Annealing Semi-supervised L2-SVM (DAS3VM)\n");
00065 optimality=DA_S3VM(Data,Options,Weights,Outputs);
00066 break;
00067 default:
00068 SG_SERROR("Algorithm unspecified");
00069 }
00070
00071 delete[] Subset->vec;
00072 delete[] Subset;
00073 return;
00074 }
00075
00076 int32_t CGLS(
00077 const struct data *Data, const struct options *Options,
00078 const struct vector_int *Subset, struct vector_double *Weights,
00079 struct vector_double *Outputs)
00080 {
00081 SG_SDEBUG("CGLS starting...");
00082
00083
00084 int32_t active = Subset->d;
00085 int32_t *J = Subset->vec;
00086 CDotFeatures* features=Data->features;
00087 float64_t *Y = Data->Y;
00088 float64_t *C = Data->C;
00089 int32_t n = Data->n;
00090 float64_t lambda = Options->lambda;
00091 int32_t cgitermax = Options->cgitermax;
00092 float64_t epsilon = Options->epsilon;
00093 float64_t *beta = Weights->vec;
00094 float64_t *o = Outputs->vec;
00095
00096 float64_t *z = new float64_t[active];
00097 float64_t *q = new float64_t[active];
00098 int32_t ii=0;
00099 for (int32_t i = active ; i-- ;){
00100 ii=J[i];
00101 z[i] = C[ii]*(Y[ii] - o[ii]);
00102 }
00103 float64_t *r = new float64_t[n];
00104 for (int32_t i = n ; i-- ;)
00105 r[i] = 0.0;
00106 for (register int32_t j=0; j < active; j++)
00107 {
00108 features->add_to_dense_vec(z[j], J[j], r, n-1);
00109 r[n-1]+=Options->bias*z[j];
00110 }
00111 float64_t *p = new float64_t[n];
00112 float64_t omega1 = 0.0;
00113 for (int32_t i = n ; i-- ;)
00114 {
00115 r[i] -= lambda*beta[i];
00116 p[i] = r[i];
00117 omega1 += r[i]*r[i];
00118 }
00119 float64_t omega_p = omega1;
00120 float64_t omega_q = 0.0;
00121 float64_t inv_omega2 = 1/omega1;
00122 float64_t scale = 0.0;
00123 float64_t omega_z=0.0;
00124 float64_t gamma = 0.0;
00125 int32_t cgiter = 0;
00126 int32_t optimality = 0;
00127 float64_t epsilon2 = epsilon*epsilon;
00128
00129 while(cgiter < cgitermax)
00130 {
00131 cgiter++;
00132 omega_q=0.0;
00133 float64_t t=0.0;
00134 register int32_t i,j;
00135
00136 for (i=0; i < active; i++)
00137 {
00138 ii=J[i];
00139 t=features->dense_dot(ii, p, n-1);
00140 t+=Options->bias*p[n-1];
00141 q[i]=t;
00142 omega_q += C[ii]*t*t;
00143 }
00144 gamma = omega1/(lambda*omega_p + omega_q);
00145 inv_omega2 = 1/omega1;
00146 for (i = n ; i-- ;)
00147 {
00148 r[i] = 0.0;
00149 beta[i] += gamma*p[i];
00150 }
00151 omega_z=0.0;
00152 for (i = active ; i-- ;)
00153 {
00154 ii=J[i];
00155 o[ii] += gamma*q[i];
00156 z[i] -= gamma*C[ii]*q[i];
00157 omega_z+=z[i]*z[i];
00158 }
00159 for (j=0; j < active; j++)
00160 {
00161 t=z[j];
00162
00163 features->add_to_dense_vec(t, J[j], r, n-1);
00164 r[n-1]+=Options->bias*t;
00165 }
00166 omega1 = 0.0;
00167 for (i = n ; i-- ;)
00168 {
00169 r[i] -= lambda*beta[i];
00170 omega1 += r[i]*r[i];
00171 }
00172 if(omega1 < epsilon2*omega_z)
00173 {
00174 optimality=1;
00175 break;
00176 }
00177 omega_p=0.0;
00178 scale=omega1*inv_omega2;
00179 for(i = n ; i-- ;)
00180 {
00181 p[i] = r[i] + p[i]*scale;
00182 omega_p += p[i]*p[i];
00183 }
00184 }
00185 SG_SDEBUG("...Done.");
00186 SG_SINFO("CGLS converged in %d iteration(s)", cgiter);
00187
00188 delete[] z;
00189 delete[] q;
00190 delete[] r;
00191 delete[] p;
00192 return optimality;
00193 }
00194
00195 int32_t L2_SVM_MFN(
00196 const struct data *Data, struct options *Options,
00197 struct vector_double *Weights, struct vector_double *Outputs,
00198 int32_t ini)
00199 {
00200
00201 CDotFeatures* features=Data->features;
00202 float64_t *Y = Data->Y;
00203 float64_t *C = Data->C;
00204 int32_t n = Data->n;
00205 int32_t m = Data->m;
00206 float64_t lambda = Options->lambda;
00207 float64_t epsilon;
00208 float64_t *w = Weights->vec;
00209 float64_t *o = Outputs->vec;
00210 float64_t F_old = 0.0;
00211 float64_t F = 0.0;
00212 float64_t diff=0.0;
00213 vector_int *ActiveSubset = new vector_int[1];
00214 ActiveSubset->vec = new int32_t[m];
00215 ActiveSubset->d = m;
00216
00217 if(ini==0) {
00218 epsilon=BIG_EPSILON;
00219 Options->cgitermax=SMALL_CGITERMAX;
00220 Options->epsilon=BIG_EPSILON;
00221 }
00222 else {epsilon = Options->epsilon;}
00223 for (int32_t i=0;i<n;i++) F+=w[i]*w[i];
00224 F=0.5*lambda*F;
00225 int32_t active=0;
00226 int32_t inactive=m-1;
00227 for (int32_t i=0; i<m ; i++)
00228 {
00229 diff=1-Y[i]*o[i];
00230 if(diff>0)
00231 {
00232 ActiveSubset->vec[active]=i;
00233 active++;
00234 F+=0.5*C[i]*diff*diff;
00235 }
00236 else
00237 {
00238 ActiveSubset->vec[inactive]=i;
00239 inactive--;
00240 }
00241 }
00242 ActiveSubset->d=active;
00243 int32_t iter=0;
00244 int32_t opt=0;
00245 int32_t opt2=0;
00246 vector_double *Weights_bar = new vector_double[1];
00247 vector_double *Outputs_bar = new vector_double[1];
00248 float64_t *w_bar = new float64_t[n];
00249 float64_t *o_bar = new float64_t[m];
00250 Weights_bar->vec=w_bar;
00251 Outputs_bar->vec=o_bar;
00252 Weights_bar->d=n;
00253 Outputs_bar->d=m;
00254 float64_t delta=0.0;
00255 float64_t t=0.0;
00256 int32_t ii = 0;
00257 while(iter<MFNITERMAX)
00258 {
00259 iter++;
00260 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)\n", iter, active, F);
00261 for (int32_t i=n; i-- ;)
00262 w_bar[i]=w[i];
00263 for (int32_t i=m; i-- ;)
00264 o_bar[i]=o[i];
00265
00266 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
00267 for(register int32_t i=active; i < m; i++)
00268 {
00269 ii=ActiveSubset->vec[i];
00270
00271 t=features->dense_dot(ii, w_bar, n-1);
00272 t+=Options->bias*w_bar[n-1];
00273
00274 o_bar[ii]=t;
00275 }
00276 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
00277 opt2=1;
00278 for (int32_t i=0;i<m;i++)
00279 {
00280 ii=ActiveSubset->vec[i];
00281 if(i<active)
00282 opt2=(opt2 && (Y[ii]*o_bar[ii]<=1+epsilon));
00283 else
00284 opt2=(opt2 && (Y[ii]*o_bar[ii]>=1-epsilon));
00285 if(opt2==0) break;
00286 }
00287 if(opt && opt2)
00288 {
00289 if(epsilon==BIG_EPSILON)
00290 {
00291 epsilon=EPSILON;
00292 Options->epsilon=EPSILON;
00293 SG_SDEBUG("epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f", BIG_EPSILON , EPSILON);
00294 continue;
00295 }
00296 else
00297 {
00298 for (int32_t i=n; i-- ;)
00299 w[i]=w_bar[i];
00300 for (int32_t i=m; i-- ;)
00301 o[i]=o_bar[i];
00302 delete[] ActiveSubset->vec;
00303 delete[] ActiveSubset;
00304 delete[] o_bar;
00305 delete[] w_bar;
00306 delete[] Weights_bar;
00307 delete[] Outputs_bar;
00308 SG_SINFO("L2_SVM_MFN converged (optimality) in %d", iter);
00309 return 1;
00310 }
00311 }
00312 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m);
00313 SG_SDEBUG("LINE_SEARCH delta = %f\n", delta);
00314 F_old=F;
00315 F=0.0;
00316 for (int32_t i=n; i-- ;) {
00317 w[i]+=delta*(w_bar[i]-w[i]);
00318 F+=w[i]*w[i];
00319 }
00320 F=0.5*lambda*F;
00321 active=0;
00322 inactive=m-1;
00323 for (int32_t i=0; i<m ; i++)
00324 {
00325 o[i]+=delta*(o_bar[i]-o[i]);
00326 diff=1-Y[i]*o[i];
00327 if(diff>0)
00328 {
00329 ActiveSubset->vec[active]=i;
00330 active++;
00331 F+=0.5*C[i]*diff*diff;
00332 }
00333 else
00334 {
00335 ActiveSubset->vec[inactive]=i;
00336 inactive--;
00337 }
00338 }
00339 ActiveSubset->d=active;
00340 if(CMath::abs(F-F_old)<RELATIVE_STOP_EPS*CMath::abs(F_old))
00341 {
00342 SG_SINFO("L2_SVM_MFN converged (rel. criterion) in %d iterations", iter);
00343 return 2;
00344 }
00345 }
00346 delete[] ActiveSubset->vec;
00347 delete[] ActiveSubset;
00348 delete[] o_bar;
00349 delete[] w_bar;
00350 delete[] Weights_bar;
00351 delete[] Outputs_bar;
00352 SG_SINFO("L2_SVM_MFN converged (max iter exceeded) in %d iterations", iter);
00353 return 0;
00354 }
00355
00356 float64_t line_search(float64_t *w,
00357 float64_t *w_bar,
00358 float64_t lambda,
00359 float64_t *o,
00360 float64_t *o_bar,
00361 float64_t *Y,
00362 float64_t *C,
00363 int32_t d,
00364 int32_t l)
00365 {
00366 float64_t omegaL = 0.0;
00367 float64_t omegaR = 0.0;
00368 float64_t diff=0.0;
00369 for(int32_t i=d; i--; )
00370 {
00371 diff=w_bar[i]-w[i];
00372 omegaL+=w[i]*diff;
00373 omegaR+=w_bar[i]*diff;
00374 }
00375 omegaL=lambda*omegaL;
00376 omegaR=lambda*omegaR;
00377 float64_t L=0.0;
00378 float64_t R=0.0;
00379 int32_t ii=0;
00380 for (int32_t i=0;i<l;i++)
00381 {
00382 if(Y[i]*o[i]<1)
00383 {
00384 diff=C[i]*(o_bar[i]-o[i]);
00385 L+=(o[i]-Y[i])*diff;
00386 R+=(o_bar[i]-Y[i])*diff;
00387 }
00388 }
00389 L+=omegaL;
00390 R+=omegaR;
00391 Delta* deltas=new Delta[l];
00392 int32_t p=0;
00393 for(int32_t i=0;i<l;i++)
00394 {
00395 diff=Y[i]*(o_bar[i]-o[i]);
00396
00397 if(Y[i]*o[i]<1)
00398 {
00399 if(diff>0)
00400 {
00401 deltas[p].delta=(1-Y[i]*o[i])/diff;
00402 deltas[p].index=i;
00403 deltas[p].s=-1;
00404 p++;
00405 }
00406 }
00407 else
00408 {
00409 if(diff<0)
00410 {
00411 deltas[p].delta=(1-Y[i]*o[i])/diff;
00412 deltas[p].index=i;
00413 deltas[p].s=1;
00414 p++;
00415 }
00416 }
00417 }
00418 std::sort(deltas,deltas+p);
00419 float64_t delta_prime=0.0;
00420 for (int32_t i=0;i<p;i++)
00421 {
00422 delta_prime = L + deltas[i].delta*(R-L);
00423 if(delta_prime>=0)
00424 break;
00425 ii=deltas[i].index;
00426 diff=(deltas[i].s)*C[ii]*(o_bar[ii]-o[ii]);
00427 L+=diff*(o[ii]-Y[ii]);
00428 R+=diff*(o_bar[ii]-Y[ii]);
00429 }
00430 delete [] deltas;
00431 return (-L/(R-L));
00432 }
00433
00434 int32_t TSVM_MFN(
00435 const struct data *Data, struct options *Options,
00436 struct vector_double *Weights, struct vector_double *Outputs)
00437 {
00438
00439 struct data *Data_Labeled = new data[1];
00440 struct vector_double *Outputs_Labeled = new vector_double[1];
00441 initialize(Outputs_Labeled,Data->l,0.0);
00442 SG_SDEBUG("Initializing weights, unknown labels");
00443 GetLabeledData(Data_Labeled,Data);
00444 L2_SVM_MFN(Data_Labeled, Options, Weights,Outputs_Labeled,0);
00446
00447
00448 int32_t p=0,q=0;
00449 float64_t t=0.0;
00450 int32_t *JU = new int32_t[Data->u];
00451 float64_t *ou = new float64_t[Data->u];
00452 float64_t lambda_0 = TSVM_LAMBDA_SMALL;
00453 for (int32_t i=0;i<Data->m;i++)
00454 {
00455 if(Data->Y[i]==0.0)
00456 {
00457 t=Data->features->dense_dot(i, Weights->vec, Data->n-1);
00458 t+=Options->bias*Weights->vec[Data->n-1];
00459
00460 Outputs->vec[i]=t;
00461 Data->C[i]=lambda_0*1.0/Data->u;
00462 JU[q]=i;
00463 ou[q]=t;
00464 q++;
00465 }
00466 else
00467 {
00468 Outputs->vec[i]=Outputs_Labeled->vec[p];
00469 Data->C[i]=1.0/Data->l;
00470 p++;
00471 }
00472 }
00473 std::nth_element(ou,ou+int32_t((1-Options->R)*Data->u-1),ou+Data->u);
00474 float64_t thresh=*(ou+int32_t((1-Options->R)*Data->u)-1);
00475 delete [] ou;
00476 for (int32_t i=0;i<Data->u;i++)
00477 {
00478 if(Outputs->vec[JU[i]]>thresh)
00479 Data->Y[JU[i]]=1.0;
00480 else
00481 Data->Y[JU[i]]=-1.0;
00482 }
00483 for (int32_t i=0;i<Data->n;i++)
00484 Weights->vec[i]=0.0;
00485 for (int32_t i=0;i<Data->m;i++)
00486 Outputs->vec[i]=0.0;
00487 L2_SVM_MFN(Data,Options,Weights,Outputs,0);
00488 int32_t num_switches=0;
00489 int32_t s=0;
00490 int32_t last_round=0;
00491 while(lambda_0 <= Options->lambda_u)
00492 {
00493 int32_t iter2=0;
00494 while(1){
00495 s=switch_labels(Data->Y,Outputs->vec,JU,Data->u,Options->S);
00496 if(s==0) break;
00497 iter2++;
00498 SG_SDEBUG("****** lambda_0 = %f iteration = %d ************************************\n", lambda_0, iter2);
00499 SG_SDEBUG("Optimizing unknown labels. switched %d labels.\n");
00500 num_switches+=s;
00501 SG_SDEBUG("Optimizing weights\n");
00502 L2_SVM_MFN(Data,Options,Weights,Outputs,1);
00503 }
00504 if(last_round==1) break;
00505 lambda_0=TSVM_ANNEALING_RATE*lambda_0;
00506 if(lambda_0 >= Options->lambda_u) {lambda_0 = Options->lambda_u; last_round=1;}
00507 for (int32_t i=0;i<Data->u;i++)
00508 Data->C[JU[i]]=lambda_0*1.0/Data->u;
00509 SG_SDEBUG("****** lambda0 increased to %f%% of lambda_u = %f ************************\n", lambda_0*100/Options->lambda_u, Options->lambda_u);
00510 SG_SDEBUG("Optimizing weights\n");
00511 L2_SVM_MFN(Data,Options,Weights,Outputs,1);
00512 }
00513 SG_SDEBUG("Total Number of Switches = %d\n", num_switches);
00514
00515 for (int32_t i=0;i<Data->u;i++) Data->Y[JU[i]] = 0.0;
00516 float64_t F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00517 SG_SDEBUG("Objective Value = %f\n",F);
00518 delete [] JU;
00519 return num_switches;
00520 }
00521
00522 int32_t switch_labels(float64_t* Y, float64_t* o, int32_t* JU, int32_t u, int32_t S)
00523 {
00524 int32_t npos=0;
00525 int32_t nneg=0;
00526 for (int32_t i=0;i<u;i++)
00527 {
00528 if((Y[JU[i]]>0) && (o[JU[i]]<1.0)) npos++;
00529 if((Y[JU[i]]<0) && (-o[JU[i]]<1.0)) nneg++;
00530 }
00531 Delta* positive=new Delta[npos];
00532 Delta* negative=new Delta[nneg];
00533 int32_t p=0;
00534 int32_t n=0;
00535 int32_t ii=0;
00536 for (int32_t i=0;i<u;i++)
00537 {
00538 ii=JU[i];
00539 if((Y[ii]>0.0) && (o[ii]<1.0)) {
00540 positive[p].delta=o[ii];
00541 positive[p].index=ii;
00542 positive[p].s=0;
00543 p++;};
00544 if((Y[ii]<0.0) && (-o[ii]<1.0))
00545 {
00546 negative[n].delta=-o[ii];
00547 negative[n].index=ii;
00548 negative[n].s=0;
00549 n++;};
00550 }
00551 std::sort(positive,positive+npos);
00552 std::sort(negative,negative+nneg);
00553 int32_t s=-1;
00554 while(1)
00555 {
00556 s++;
00557 if((s>=S) || (positive[s].delta>=-negative[s].delta) || (s>=npos) || (s>=nneg))
00558 break;
00559 Y[positive[s].index]=-1.0;
00560 Y[negative[s].index]= 1.0;
00561 }
00562 delete [] positive;
00563 delete [] negative;
00564 return s;
00565 }
00566
00567 int32_t DA_S3VM(
00568 struct data *Data, struct options *Options, struct vector_double *Weights,
00569 struct vector_double *Outputs)
00570 {
00571 float64_t T = DA_INIT_TEMP*Options->lambda_u;
00572 int32_t iter1 = 0, iter2 =0;
00573 float64_t *p = new float64_t[Data->u];
00574 float64_t *q = new float64_t[Data->u];
00575 float64_t *g = new float64_t[Data->u];
00576 float64_t F,F_min;
00577 float64_t *w_min = new float64_t[Data->n];
00578 float64_t *o_min = new float64_t[Data->m];
00579 float64_t *w = Weights->vec;
00580 float64_t *o = Outputs->vec;
00581 float64_t kl_divergence = 1.0;
00582
00583 SG_SDEBUG("Initializing weights, p");
00584 for (int32_t i=0;i<Data->u; i++)
00585 p[i] = Options->R;
00586
00587 int32_t *JU = new int32_t[Data->u];
00588 int32_t j=0;
00589 for(int32_t i=0;i<Data->m;i++)
00590 {
00591 if(Data->Y[i]==0.0)
00592 {JU[j]=i;j++;}
00593 }
00594 float64_t H = entropy(p,Data->u);
00595 optimize_w(Data,p,Options,Weights,Outputs,0);
00596 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00597 F_min = F;
00598 for (int32_t i=0;i<Weights->d;i++)
00599 w_min[i]=w[i];
00600 for (int32_t i=0;i<Outputs->d;i++)
00601 o_min[i]=o[i];
00602 while((iter1 < DA_OUTER_ITERMAX) && (H > Options->epsilon))
00603 {
00604 iter1++;
00605 iter2=0;
00606 kl_divergence=1.0;
00607 while((iter2 < DA_INNER_ITERMAX) && (kl_divergence > Options->epsilon))
00608 {
00609 iter2++;
00610 for (int32_t i=0;i<Data->u;i++)
00611 {
00612 q[i]=p[i];
00613 g[i] = Options->lambda_u*((o[JU[i]] > 1 ? 0 : (1 - o[JU[i]])*(1 - o[JU[i]])) - (o[JU[i]]< -1 ? 0 : (1 + o[JU[i]])*(1 + o[JU[i]])));
00614 }
00615 SG_SDEBUG("Optimizing p.\n");
00616 optimize_p(g,Data->u,T,Options->R,p);
00617 kl_divergence=KL(p,q,Data->u);
00618 SG_SDEBUG("Optimizing weights\n");
00619 optimize_w(Data,p,Options,Weights,Outputs,1);
00620 F = transductive_cost(norm_square(Weights),Data->Y,Outputs->vec,Outputs->d,Options->lambda,Options->lambda_u);
00621 if(F < F_min)
00622 {
00623 F_min = F;
00624 for (int32_t i=0;i<Weights->d;i++)
00625 w_min[i]=w[i];
00626 for (int32_t i=0;i<Outputs->d;i++)
00627 o_min[i]=o[i];
00628 }
00629 SG_SDEBUG("***** outer_iter = %d T = %g inner_iter = %d kl = %g cost = %g *****\n",iter1,T,iter2,kl_divergence,F);
00630 }
00631 H = entropy(p,Data->u);
00632 SG_SDEBUG("***** Finished outer_iter = %d T = %g Entropy = %g ***\n", iter1,T,H);
00633 T = T/DA_ANNEALING_RATE;
00634 }
00635 for (int32_t i=0;i<Weights->d;i++)
00636 w[i]=w_min[i];
00637 for (int32_t i=0;i<Outputs->d;i++)
00638 o[i]=o_min[i];
00639
00640 delete [] p;
00641 delete [] q;
00642 delete [] g;
00643 delete [] JU;
00644 delete [] w_min;
00645 delete [] o_min;
00646 SG_SINFO("(min) Objective Value = %f", F_min);
00647 return 1;
00648 }
00649
00650 int32_t optimize_w(
00651 const struct data *Data, const float64_t *p, struct options *Options,
00652 struct vector_double *Weights, struct vector_double *Outputs, int32_t ini)
00653 {
00654 int32_t i,j;
00655 CDotFeatures* features=Data->features;
00656 int32_t n = Data->n;
00657 int32_t m = Data->m;
00658 int32_t u = Data->u;
00659 float64_t lambda = Options->lambda;
00660 float64_t epsilon;
00661 float64_t *w = Weights->vec;
00662 float64_t *o = new float64_t[m+u];
00663 float64_t *Y = new float64_t[m+u];
00664 float64_t *C = new float64_t[m+u];
00665 int32_t *labeled_indices = new int32_t[m];
00666 float64_t F_old = 0.0;
00667 float64_t F = 0.0;
00668 float64_t diff=0.0;
00669 float64_t lambda_u_by_u = Options->lambda_u/u;
00670 vector_int *ActiveSubset = new vector_int[1];
00671 ActiveSubset->vec = new int32_t[m];
00672 ActiveSubset->d = m;
00673
00674 if(ini==0)
00675 {
00676 epsilon=BIG_EPSILON;
00677 Options->cgitermax=SMALL_CGITERMAX;
00678 Options->epsilon=BIG_EPSILON;}
00679 else {epsilon = Options->epsilon;}
00680
00681 for(i=0;i<n;i++) F+=w[i]*w[i];
00682 F=lambda*F;
00683 int32_t active=0;
00684 int32_t inactive=m-1;
00685 float64_t temp1;
00686 float64_t temp2;
00687
00688 j = 0;
00689 for(i=0; i<m ; i++)
00690 {
00691 o[i]=Outputs->vec[i];
00692 if(Data->Y[i]==0.0)
00693 {
00694 labeled_indices[i]=0;
00695 o[m+j]=o[i];
00696 Y[i]=1;
00697 Y[m+j]=-1;
00698 C[i]=lambda_u_by_u*p[j];
00699 C[m+j]=lambda_u_by_u*(1-p[j]);
00700 ActiveSubset->vec[active]=i;
00701 active++;
00702 diff = 1 - CMath::abs(o[i]);
00703 if(diff>0)
00704 {
00705 Data->Y[i] = 2*p[j]-1;
00706 Data->C[i] = lambda_u_by_u;
00707 temp1 = (1 - o[i]);
00708 temp2 = (1 + o[i]);
00709 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
00710 }
00711 else
00712 {
00713 if(o[i]>0)
00714 {
00715 Data->Y[i] = -1.0;
00716 Data->C[i] = C[m+j];
00717 }
00718 else
00719 {
00720 Data->Y[i] = 1.0;
00721 Data->C[i] = C[i];
00722 }
00723 temp1 = (1-Data->Y[i]*o[i]);
00724 F+= Data->C[i]*temp1*temp1;
00725 }
00726 j++;
00727 }
00728 else
00729 {
00730 labeled_indices[i]=1;
00731 Y[i]=Data->Y[i];
00732 C[i]=1.0/Data->l;
00733 Data->C[i]=1.0/Data->l;
00734 diff=1-Data->Y[i]*o[i];
00735 if(diff>0)
00736 {
00737 ActiveSubset->vec[active]=i;
00738 active++;
00739 F+=Data->C[i]*diff*diff;
00740 }
00741 else
00742 {
00743 ActiveSubset->vec[inactive]=i;
00744 inactive--;
00745 }
00746 }
00747 }
00748 F=0.5*F;
00749 ActiveSubset->d=active;
00750 int32_t iter=0;
00751 int32_t opt=0;
00752 int32_t opt2=0;
00753 vector_double *Weights_bar = new vector_double[1];
00754 vector_double *Outputs_bar = new vector_double[1];
00755 float64_t *w_bar = new float64_t[n];
00756 float64_t *o_bar = new float64_t[m+u];
00757 Weights_bar->vec=w_bar;
00758 Outputs_bar->vec=o_bar;
00759 Weights_bar->d=n;
00760 Outputs_bar->d=m;
00761 float64_t delta=0.0;
00762 float64_t t=0.0;
00763 int32_t ii = 0;
00764 while(iter<MFNITERMAX)
00765 {
00766 iter++;
00767 SG_SDEBUG("L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)", iter, active, F);
00768 for(i=n; i-- ;)
00769 w_bar[i]=w[i];
00770
00771 for(i=m+u; i-- ;)
00772 o_bar[i]=o[i];
00773 opt=CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
00774 for(i=active; i < m; i++)
00775 {
00776 ii=ActiveSubset->vec[i];
00777 t=features->dense_dot(ii, w_bar, n-1);
00778 t+=Options->bias*w_bar[n-1];
00779
00780 o_bar[ii]=t;
00781 }
00782
00783 j=0;
00784 for(i=0; i<m;i++)
00785 {
00786 if(labeled_indices[i]==0)
00787 {o_bar[m+j]=o_bar[i]; j++;};
00788 }
00789 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
00790 opt2=1;
00791 for(i=0; i < m ;i++)
00792 {
00793 ii=ActiveSubset->vec[i];
00794 if(i<active)
00795 {
00796 if(labeled_indices[ii]==1)
00797 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]<=1+epsilon));
00798 else
00799 {
00800 if(CMath::abs(o[ii])<1)
00801 opt2=(opt2 && (CMath::abs(o_bar[ii])<=1+epsilon));
00802 else
00803 opt2=(opt2 && (CMath::abs(o_bar[ii])>=1-epsilon));
00804 }
00805 }
00806 else
00807 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]>=1-epsilon));
00808 if(opt2==0) break;
00809 }
00810 if(opt && opt2)
00811 {
00812 if(epsilon==BIG_EPSILON)
00813 {
00814 epsilon=EPSILON;
00815 Options->epsilon=EPSILON;
00816 SG_SDEBUG( "epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f\n", BIG_EPSILON, EPSILON);
00817 continue;
00818 }
00819 else
00820 {
00821 for(i=n; i-- ;)
00822 w[i]=w_bar[i];
00823 for(i=m; i-- ;)
00824 Outputs->vec[i]=o_bar[i];
00825 for(i=m; i-- ;)
00826 {
00827 if(labeled_indices[i]==0)
00828 Data->Y[i]=0.0;
00829 }
00830 delete[] ActiveSubset->vec;
00831 delete[] ActiveSubset;
00832 delete[] o_bar;
00833 delete[] w_bar;
00834 delete[] o;
00835 delete[] Weights_bar;
00836 delete[] Outputs_bar;
00837 delete[] Y;
00838 delete[] C;
00839 delete[] labeled_indices;
00840 SG_SINFO("L2_SVM_MFN converged in %d iteration(s)", iter);
00841 return 1;
00842 }
00843 }
00844
00845 delta=line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m+u);
00846 SG_SDEBUG("LINE_SEARCH delta = %f", delta);
00847 F_old=F;
00848 F=0.0;
00849 for(i=0;i<n;i++) {w[i]+=delta*(w_bar[i]-w[i]); F+=w[i]*w[i];}
00850 F=lambda*F;
00851 j=0;
00852 active=0;
00853 inactive=m-1;
00854 for(i=0; i<m ; i++)
00855 {
00856 o[i]+=delta*(o_bar[i]-o[i]);
00857 if(labeled_indices[i]==0)
00858 {
00859 o[m+j]=o[i];
00860 ActiveSubset->vec[active]=i;
00861 active++;
00862 diff = 1 - CMath::abs(o[i]);
00863 if(diff>0)
00864 {
00865 Data->Y[i] = 2*p[j]-1;
00866 Data->C[i] = lambda_u_by_u;
00867 temp1 = (1 - o[i]);
00868 temp2 = (1 + o[i]);
00869 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
00870 }
00871 else
00872 {
00873 if(o[i]>0)
00874 {
00875 Data->Y[i] = -1;
00876 Data->C[i] = C[m+j];
00877 }
00878 else
00879 {
00880 Data->Y[i] = +1;
00881 Data->C[i] = C[i];
00882 }
00883 temp1=(1-Data->Y[i]*o[i]);
00884 F+= Data->C[i]*temp1*temp1;
00885 }
00886 j++;
00887 }
00888 else
00889 {
00890 diff=1-Data->Y[i]*o[i];
00891 if(diff>0)
00892 {
00893 ActiveSubset->vec[active]=i;
00894 active++;
00895 F+=Data->C[i]*diff*diff;
00896 }
00897 else
00898 {
00899 ActiveSubset->vec[inactive]=i;
00900 inactive--;
00901 }
00902 }
00903 }
00904 F=0.5*F;
00905 ActiveSubset->d=active;
00906 if(CMath::abs(F-F_old)<EPSILON)
00907 break;
00908 }
00909 for(i=m; i-- ;)
00910 {
00911 Outputs->vec[i]=o[i];
00912 if(labeled_indices[i]==0)
00913 Data->Y[i]=0.0;
00914 }
00915 delete[] ActiveSubset->vec;
00916 delete[] labeled_indices;
00917 delete[] ActiveSubset;
00918 delete[] o_bar;
00919 delete[] w_bar;
00920 delete[] o;
00921 delete[] Weights_bar;
00922 delete[] Outputs_bar;
00923 delete[] Y;
00924 delete[] C;
00925 SG_SINFO("L2_SVM_MFN converged in %d iterations", iter);
00926 return 0;
00927 }
00928
00929 void optimize_p(
00930 const float64_t* g, int32_t u, float64_t T, float64_t r, float64_t* p)
00931 {
00932 int32_t iter=0;
00933 float64_t epsilon=1e-10;
00934 int32_t maxiter=500;
00935 float64_t nu_minus=g[0];
00936 float64_t nu_plus=g[0];
00937 for (int32_t i=0;i<u;i++)
00938 {
00939 if(g[i]<nu_minus) nu_minus=g[i];
00940 if(g[i]>nu_plus) nu_plus=g[i];
00941 };
00942
00943 float64_t b=T*log((1-r)/r);
00944 nu_minus-=b;
00945 nu_plus-=b;
00946 float64_t nu=(nu_plus+nu_minus)/2;
00947 float64_t Bnu=0.0;
00948 float64_t BnuPrime=0.0;
00949 float64_t s=0.0;
00950 float64_t tmp=0.0;
00951 for (int32_t i=0;i<u;i++)
00952 {
00953 s=exp((g[i]-nu)/T);
00954 if(!(CMath::is_infinity(s)))
00955 {
00956 tmp=1.0/(1.0+s);
00957 Bnu+=tmp;
00958 BnuPrime+=s*tmp*tmp;
00959 }
00960 }
00961 Bnu=Bnu/u;
00962 Bnu-=r;
00963 BnuPrime=BnuPrime/(T*u);
00964 float64_t nuHat=0.0;
00965 while((CMath::abs(Bnu)>epsilon) && (iter < maxiter))
00966 {
00967 iter++;
00968 if(CMath::abs(BnuPrime)>0.0)
00969 nuHat=nu-Bnu/BnuPrime;
00970 if((CMath::abs(BnuPrime) > 0.0) | (nuHat>nu_plus) | (nuHat < nu_minus))
00971 nu=(nu_minus+nu_plus)/2.0;
00972 else
00973 nu=nuHat;
00974 Bnu=0.0;
00975 BnuPrime=0.0;
00976 for(int32_t i=0;i<u;i++)
00977 {
00978 s=exp((g[i]-nu)/T);
00979 if(!(CMath::is_infinity(s)))
00980 {
00981 tmp=1.0/(1.0+s);
00982 Bnu+=tmp;
00983 BnuPrime+=s*tmp*tmp;
00984 }
00985 }
00986 Bnu=Bnu/u;
00987 Bnu-=r;
00988 BnuPrime=BnuPrime/(T*u);
00989 if(Bnu<0)
00990 nu_minus=nu;
00991 else
00992 nu_plus=nu;
00993 if(CMath::abs(nu_minus-nu_plus)<epsilon)
00994 break;
00995 }
00996 if(CMath::abs(Bnu)>epsilon)
00997 SG_SWARNING("Warning (Root): root not found to required precision\n");
00998
00999 for (int32_t i=0;i<u;i++)
01000 {
01001 s=exp((g[i]-nu)/T);
01002 if(CMath::is_infinity(s)) p[i]=0.0;
01003 else p[i]=1.0/(1.0+s);
01004 }
01005 SG_SINFO(" root (nu) = %f B(nu) = %f", nu, Bnu);
01006 }
01007
01008 float64_t transductive_cost(
01009 float64_t normWeights, float64_t *Y, float64_t *Outputs, int32_t m,
01010 float64_t lambda, float64_t lambda_u)
01011 {
01012 float64_t F1=0.0,F2=0.0, o=0.0, y=0.0;
01013 int32_t u=0,l=0;
01014 for (int32_t i=0;i<m;i++)
01015 {
01016 o=Outputs[i];
01017 y=Y[i];
01018 if(y==0.0)
01019 {F1 += CMath::abs(o) > 1 ? 0 : (1 - CMath::abs(o))*(1 - CMath::abs(o)); u++;}
01020 else
01021 {F2 += y*o > 1 ? 0 : (1-y*o)*(1-y*o); l++;}
01022 }
01023 float64_t F;
01024 F = 0.5*(lambda*normWeights + lambda_u*F1/u + F2/l);
01025 return F;
01026 }
01027
01028 float64_t entropy(const float64_t *p, int32_t u)
01029 {
01030 float64_t h=0.0;
01031 float64_t q=0.0;
01032 for (int32_t i=0;i<u;i++)
01033 {
01034 q=p[i];
01035 if(q>0 && q<1)
01036 h+= -(q*CMath::log2(q) + (1-q)*CMath::log2(1-q));
01037 }
01038 return h/u;
01039 }
01040
01041 float64_t KL(const float64_t *p, const float64_t *q, int32_t u)
01042 {
01043 float64_t h=0.0;
01044 float64_t p1=0.0;
01045 float64_t q1=0.0;
01046 float64_t g=0.0;
01047 for (int32_t i=0;i<u;i++)
01048 {
01049 p1=p[i];
01050 q1=q[i];
01051 if(p1>1-1e-8) p1-=1e-8;
01052 if(p1<1-1e-8) p1+=1e-8;
01053 if(q1>1-1e-8) q1-=1e-8;
01054 if(q1<1-1e-8) q1+=1e-8;
01055 g= (p1*CMath::log2(p1/q1) + (1-p1)*CMath::log2((1-p1)/(1-q1)));
01056 if(CMath::abs(g)<1e-12 || CMath::is_nan(g)) g=0.0;
01057 h+=g;
01058 }
01059 return h/u;
01060 }
01061
01062
01063 float64_t norm_square(const vector_double *A)
01064 {
01065 float64_t x=0.0, t=0.0;
01066 for(int32_t i=0;i<A->d;i++)
01067 {
01068 t=A->vec[i];
01069 x+=t*t;
01070 }
01071 return x;
01072 }
01073
01074 void initialize(struct vector_double *A, int32_t k, float64_t a)
01075 {
01076 float64_t *vec = new float64_t[k];
01077 for (int32_t i=0;i<k;i++)
01078 vec[i]=a;
01079 A->vec = vec;
01080 A->d = k;
01081 return;
01082 }
01083
01084 void initialize(struct vector_int *A, int32_t k)
01085 {
01086 int32_t *vec = new int32_t[k];
01087 for(int32_t i=0;i<k;i++)
01088 vec[i]=i;
01089 A->vec = vec;
01090 A->d = k;
01091 return;
01092 }
01093
01094 void GetLabeledData(struct data *D, const struct data *Data)
01095 {
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116
01117
01118
01119
01120
01121
01122
01123
01124
01125
01126
01127
01128
01129
01130
01131
01132
01133
01134
01135 }
01136 }