34 struct options *Options,
35 struct vector_double *Weights,
36 struct vector_double *Outputs)
41 vector_int *Subset =
SG_MALLOC(vector_int, 1);
44 int32_t optimality = 0;
48 SG_SINFO(
"Regularized Least Squares Regression (CGLS)\n");
49 optimality=
CGLS(Data,Options,Subset,Weights,Outputs);
52 SG_SINFO(
"Regularized Least Squares Classification (CGLS)\n");
53 optimality=
CGLS(Data,Options,Subset,Weights,Outputs);
56 SG_SINFO(
"Modified Finite Newton L2-SVM (L2-SVM-MFN)\n");
57 optimality=
L2_SVM_MFN(Data,Options,Weights,Outputs,0);
60 SG_SINFO(
"Transductive L2-SVM (TSVM)\n");
61 optimality=
TSVM_MFN(Data,Options,Weights,Outputs);
64 SG_SINFO(
"Deterministic Annealing Semi-supervised L2-SVM (DAS3VM)\n");
65 optimality=
DA_S3VM(Data,Options,Weights,Outputs);
72 SG_SWARNING(
"SSL-Algorithm terminated without reaching optimum.\n");
80 const struct data *Data,
const struct options *Options,
81 const struct vector_int *Subset,
struct vector_double *Weights,
82 struct vector_double *Outputs)
87 int32_t active = Subset->d;
88 int32_t *J = Subset->vec;
94 int32_t cgitermax = Options->cgitermax;
102 for (int32_t i = active ; i-- ;){
104 z[i] = C[ii]*(Y[ii] - o[ii]);
107 for (int32_t i = n ; i-- ;)
109 for (
register int32_t j=0; j < active; j++)
112 r[n-1]+=Options->bias*z[j];
116 for (int32_t i = n ; i-- ;)
118 r[i] -= lambda*beta[i];
129 int32_t optimality = 0;
132 while(cgiter < cgitermax)
137 register int32_t i,j;
139 for (i=0; i < active; i++)
143 t+=Options->bias*p[n-1];
145 omega_q += C[ii]*t*t;
147 gamma = omega1/(lambda*omega_p + omega_q);
148 inv_omega2 = 1/omega1;
152 beta[i] += gamma*p[i];
155 for (i = active ; i-- ;)
159 z[i] -= gamma*C[ii]*q[i];
162 for (j=0; j < active; j++)
167 r[n-1]+=Options->bias*t;
172 r[i] -= lambda*beta[i];
175 if(omega1 < epsilon2*omega_z)
181 scale=omega1*inv_omega2;
184 p[i] = r[i] + p[i]*scale;
185 omega_p += p[i]*p[i];
189 SG_SINFO(
"CGLS converged in %d iteration(s)", cgiter);
199 const struct data *Data,
struct options *Options,
200 struct vector_double *Weights,
struct vector_double *Outputs,
216 vector_int *ActiveSubset =
SG_MALLOC(vector_int, 1);
217 ActiveSubset->vec =
SG_MALLOC(int32_t, m);
222 Options->cgitermax=SMALL_CGITERMAX;
223 Options->epsilon=BIG_EPSILON;
225 else {epsilon = Options->epsilon;}
226 for (int32_t i=0;i<n;i++) F+=w[i]*w[i];
229 int32_t inactive=m-1;
230 for (int32_t i=0; i<m ; i++)
235 ActiveSubset->vec[active]=i;
237 F+=0.5*C[i]*diff*diff;
241 ActiveSubset->vec[inactive]=i;
245 ActiveSubset->d=active;
249 vector_double *Weights_bar =
SG_MALLOC(vector_double, 1);
250 vector_double *Outputs_bar =
SG_MALLOC(vector_double, 1);
253 Weights_bar->vec=w_bar;
254 Outputs_bar->vec=o_bar;
260 while(iter<MFNITERMAX)
263 SG_SDEBUG(
"L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)\n", iter, active, F);
264 for (int32_t i=n; i-- ;)
266 for (int32_t i=m; i-- ;)
269 opt=
CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
270 for(
register int32_t i=active; i < m; i++)
272 ii=ActiveSubset->vec[i];
275 t+=Options->bias*w_bar[n-1];
279 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
281 for (int32_t i=0;i<m;i++)
283 ii=ActiveSubset->vec[i];
285 opt2=(opt2 && (Y[ii]*o_bar[ii]<=1+
epsilon));
287 opt2=(opt2 && (Y[ii]*o_bar[ii]>=1-
epsilon));
292 if(epsilon==BIG_EPSILON)
295 Options->epsilon=EPSILON;
296 SG_SDEBUG(
"epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f", BIG_EPSILON , EPSILON);
301 for (int32_t i=n; i-- ;)
303 for (int32_t i=m; i-- ;)
311 SG_SINFO(
"L2_SVM_MFN converged (optimality) in %d", iter);
316 SG_SDEBUG(
"LINE_SEARCH delta = %f\n", delta);
319 for (int32_t i=n; i-- ;) {
320 w[i]+=delta*(w_bar[i]-w[i]);
326 for (int32_t i=0; i<m ; i++)
328 o[i]+=delta*(o_bar[i]-o[i]);
332 ActiveSubset->vec[active]=i;
334 F+=0.5*C[i]*diff*diff;
338 ActiveSubset->vec[inactive]=i;
342 ActiveSubset->d=active;
345 SG_SINFO(
"L2_SVM_MFN converged (rel. criterion) in %d iterations", iter);
355 SG_SINFO(
"L2_SVM_MFN converged (max iter exceeded) in %d iterations", iter);
372 for(int32_t i=d; i--; )
376 omegaR+=w_bar[i]*diff;
378 omegaL=lambda*omegaL;
379 omegaR=lambda*omegaR;
383 for (int32_t i=0;i<l;i++)
387 diff=C[i]*(o_bar[i]-o[i]);
389 R+=(o_bar[i]-Y[i])*diff;
396 for(int32_t i=0;i<l;i++)
398 diff=Y[i]*(o_bar[i]-o[i]);
404 deltas[p].delta=(1-Y[i]*o[i])/diff;
414 deltas[p].delta=(1-Y[i]*o[i])/diff;
421 std::sort(deltas,deltas+p);
423 for (int32_t i=0;i<p;i++)
425 delta_prime = L + deltas[i].delta*(R-L);
429 diff=(deltas[i].s)*C[ii]*(o_bar[ii]-o[ii]);
430 L+=diff*(o[ii]-Y[ii]);
431 R+=diff*(o_bar[ii]-Y[ii]);
438 const struct data *Data,
struct options *Options,
439 struct vector_double *Weights,
struct vector_double *Outputs)
442 struct data *Data_Labeled =
SG_MALLOC(data, 1);
443 struct vector_double *Outputs_Labeled =
SG_MALLOC(vector_double, 1);
445 SG_SDEBUG(
"Initializing weights, unknown labels");
447 L2_SVM_MFN(Data_Labeled, Options, Weights,Outputs_Labeled,0);
453 int32_t *JU =
SG_MALLOC(int32_t, Data->u);
456 for (int32_t i=0;i<Data->m;i++)
460 t=Data->features->dense_dot(i, Weights->vec, Data->n-1);
461 t+=Options->bias*Weights->vec[Data->n-1];
464 Data->C[i]=lambda_0*1.0/Data->u;
471 Outputs->vec[i]=Outputs_Labeled->vec[p];
472 Data->C[i]=1.0/Data->l;
476 std::nth_element(ou,ou+int32_t((1-Options->R)*Data->u-1),ou+Data->u);
477 float64_t thresh=*(ou+int32_t((1-Options->R)*Data->u)-1);
479 for (int32_t i=0;i<Data->u;i++)
481 if(Outputs->vec[JU[i]]>thresh)
486 for (int32_t i=0;i<Data->n;i++)
488 for (int32_t i=0;i<Data->m;i++)
491 int32_t num_switches=0;
493 int32_t last_round=0;
494 while(lambda_0 <= Options->lambda_u)
501 SG_SDEBUG(
"****** lambda_0 = %f iteration = %d ************************************\n", lambda_0, iter2);
502 SG_SDEBUG(
"Optimizing unknown labels. switched %d labels.\n");
507 if(last_round==1)
break;
508 lambda_0=TSVM_ANNEALING_RATE*lambda_0;
509 if(lambda_0 >= Options->lambda_u) {lambda_0 = Options->lambda_u; last_round=1;}
510 for (int32_t i=0;i<Data->u;i++)
511 Data->C[JU[i]]=lambda_0*1.0/Data->u;
512 SG_SDEBUG(
"****** lambda0 increased to %f%% of lambda_u = %f ************************\n", lambda_0*100/Options->lambda_u, Options->lambda_u);
516 SG_SDEBUG(
"Total Number of Switches = %d\n", num_switches);
518 for (int32_t i=0;i<Data->u;i++) Data->Y[JU[i]] = 0.0;
529 for (int32_t i=0;i<u;i++)
531 if((Y[JU[i]]>0) && (o[JU[i]]<1.0)) npos++;
532 if((Y[JU[i]]<0) && (-o[JU[i]]<1.0)) nneg++;
539 for (int32_t i=0;i<u;i++)
542 if((Y[ii]>0.0) && (o[ii]<1.0)) {
543 positive[p].delta=o[ii];
544 positive[p].index=ii;
547 if((Y[ii]<0.0) && (-o[ii]<1.0))
549 negative[n].delta=-o[ii];
550 negative[n].index=ii;
554 std::sort(positive,positive+npos);
555 std::sort(negative,negative+nneg);
560 if((s>=S) || (positive[s].
delta>=-negative[s].
delta) || (s>=npos) || (s>=nneg))
562 Y[positive[s].index]=-1.0;
563 Y[negative[s].index]= 1.0;
571 struct data *Data,
struct options *Options,
struct vector_double *Weights,
572 struct vector_double *Outputs)
574 float64_t T = DA_INIT_TEMP*Options->lambda_u;
575 int32_t iter1 = 0, iter2 =0;
587 for (int32_t i=0;i<Data->u; i++)
590 int32_t *JU =
SG_MALLOC(int32_t, Data->u);
592 for(int32_t i=0;i<Data->m;i++)
601 for (int32_t i=0;i<Weights->d;i++)
603 for (int32_t i=0;i<Outputs->d;i++)
605 while((iter1 < DA_OUTER_ITERMAX) && (H > Options->epsilon))
610 while((iter2 < DA_INNER_ITERMAX) && (kl_divergence > Options->epsilon))
613 for (int32_t i=0;i<Data->u;i++)
616 g[i] = Options->lambda_u*((o[JU[i]] > 1 ? 0 : (1 - o[JU[i]])*(1 - o[JU[i]])) - (o[JU[i]]< -1 ? 0 : (1 + o[JU[i]])*(1 + o[JU[i]])));
620 kl_divergence=
KL(p,q,Data->u);
627 for (int32_t i=0;i<Weights->d;i++)
629 for (int32_t i=0;i<Outputs->d;i++)
632 SG_SDEBUG(
"***** outer_iter = %d T = %g inner_iter = %d kl = %g cost = %g *****\n",iter1,T,iter2,kl_divergence,F);
635 SG_SDEBUG(
"***** Finished outer_iter = %d T = %g Entropy = %g ***\n", iter1,T,H);
636 T = T/DA_ANNEALING_RATE;
638 for (int32_t i=0;i<Weights->d;i++)
640 for (int32_t i=0;i<Outputs->d;i++)
649 SG_SINFO(
"(min) Objective Value = %f", F_min);
654 const struct data *Data,
const float64_t *p,
struct options *Options,
655 struct vector_double *Weights,
struct vector_double *Outputs, int32_t ini)
668 int32_t *labeled_indices =
SG_MALLOC(int32_t, m);
672 float64_t lambda_u_by_u = Options->lambda_u/u;
673 vector_int *ActiveSubset =
SG_MALLOC(vector_int, 1);
674 ActiveSubset->vec =
SG_MALLOC(int32_t, m);
680 Options->cgitermax=SMALL_CGITERMAX;
681 Options->epsilon=BIG_EPSILON;}
682 else {epsilon = Options->epsilon;}
684 for(i=0;i<n;i++) F+=w[i]*w[i];
687 int32_t inactive=m-1;
694 o[i]=Outputs->vec[i];
697 labeled_indices[i]=0;
701 C[i]=lambda_u_by_u*p[j];
702 C[m+j]=lambda_u_by_u*(1-p[j]);
703 ActiveSubset->vec[active]=i;
708 Data->Y[i] = 2*p[j]-1;
709 Data->C[i] = lambda_u_by_u;
712 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
726 temp1 = (1-Data->Y[i]*o[i]);
727 F+= Data->C[i]*temp1*temp1;
733 labeled_indices[i]=1;
736 Data->C[i]=1.0/Data->l;
737 diff=1-Data->Y[i]*o[i];
740 ActiveSubset->vec[active]=i;
742 F+=Data->C[i]*diff*diff;
746 ActiveSubset->vec[inactive]=i;
752 ActiveSubset->d=active;
756 vector_double *Weights_bar =
SG_MALLOC(vector_double, 1);
757 vector_double *Outputs_bar =
SG_MALLOC(vector_double, 1);
760 Weights_bar->vec=w_bar;
761 Outputs_bar->vec=o_bar;
767 while(iter<MFNITERMAX)
770 SG_SDEBUG(
"L2_SVM_MFN Iteration# %d (%d active examples, objective_value = %f)", iter, active, F);
776 opt=
CGLS(Data,Options,ActiveSubset,Weights_bar,Outputs_bar);
777 for(i=active; i < m; i++)
779 ii=ActiveSubset->vec[i];
781 t+=Options->bias*w_bar[n-1];
789 if(labeled_indices[i]==0)
790 {o_bar[m+j]=o_bar[i]; j++;};
792 if(ini==0) {Options->cgitermax=CGITERMAX; ini=1;};
796 ii=ActiveSubset->vec[i];
799 if(labeled_indices[ii]==1)
800 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]<=1+
epsilon));
810 opt2=(opt2 && (Data->Y[ii]*o_bar[ii]>=1-
epsilon));
815 if(epsilon==BIG_EPSILON)
818 Options->epsilon=EPSILON;
819 SG_SDEBUG(
"epsilon = %f case converged (speedup heuristic 2). Continuing with epsilon=%f\n", BIG_EPSILON, EPSILON);
827 Outputs->vec[i]=o_bar[i];
830 if(labeled_indices[i]==0)
843 SG_SINFO(
"L2_SVM_MFN converged in %d iteration(s)", iter);
848 delta=
line_search(w,w_bar,lambda,o,o_bar,Y,C,n,m+u);
849 SG_SDEBUG(
"LINE_SEARCH delta = %f", delta);
852 for(i=0;i<n;i++) {w[i]+=delta*(w_bar[i]-w[i]); F+=w[i]*w[i];}
859 o[i]+=delta*(o_bar[i]-o[i]);
860 if(labeled_indices[i]==0)
863 ActiveSubset->vec[active]=i;
868 Data->Y[i] = 2*p[j]-1;
869 Data->C[i] = lambda_u_by_u;
872 F+=lambda_u_by_u*(p[j]*temp1*temp1 + (1-p[j])*temp2*temp2);
886 temp1=(1-Data->Y[i]*o[i]);
887 F+= Data->C[i]*temp1*temp1;
893 diff=1-Data->Y[i]*o[i];
896 ActiveSubset->vec[active]=i;
898 F+=Data->C[i]*diff*diff;
902 ActiveSubset->vec[inactive]=i;
908 ActiveSubset->d=active;
914 Outputs->vec[i]=o[i];
915 if(labeled_indices[i]==0)
928 SG_SINFO(
"L2_SVM_MFN converged in %d iterations", iter);
940 for (int32_t i=0;i<u;i++)
942 if(g[i]<nu_minus) nu_minus=g[i];
943 if(g[i]>nu_plus) nu_plus=g[i];
954 for (int32_t i=0;i<u;i++)
966 BnuPrime=BnuPrime/(T*u);
972 nuHat=nu-Bnu/BnuPrime;
973 if((
CMath::abs(BnuPrime) > 0.0) | (nuHat>nu_plus) | (nuHat < nu_minus))
974 nu=(nu_minus+nu_plus)/2.0;
979 for(int32_t i=0;i<u;i++)
991 BnuPrime=BnuPrime/(T*u);
1000 SG_SWARNING(
"Warning (Root): root not found to required precision\n");
1002 for (int32_t i=0;i<u;i++)
1006 else p[i]=1.0/(1.0+s);
1008 SG_SINFO(
" root (nu) = %f B(nu) = %f", nu, Bnu);
1017 for (int32_t i=0;i<m;i++)
1024 {
F2 += y*o > 1 ? 0 : (1-y*o)*(1-y*o); l++;}
1027 F = 0.5*(lambda*normWeights + lambda_u*F1/u +
F2/l);
1035 for (int32_t i=0;i<u;i++)
1050 for (int32_t i=0;i<u;i++)
1054 if(p1>1-1e-8) p1-=1e-8;
1055 if(p1<1-1e-8) p1+=1e-8;
1056 if(q1>1-1e-8) q1-=1e-8;
1057 if(q1<1-1e-8) q1+=1e-8;
1069 for(int32_t i=0;i<A->d;i++)
1080 for (int32_t i=0;i<k;i++)
1090 for(int32_t i=0;i<k;i++)