SHOGUN  v2.0.0
pr_loqo.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Purpose: solves quadratic programming problem for pattern recognition
8  * for support vectors
9  *
10  * Written (W) 1997-1998 Alex J. Smola
11  * Written (W) 1999-2009 Soeren Sonnenburg
12  * Written (W) 1999-2008 Gunnar Raetsch
13  * Copyright (C) 1997-2009 Fraunhofer Institute FIRST and Max-Planck-Society
14  */
15
16 #include <shogun/lib/common.h>
17 #include <shogun/io/SGIO.h>
21
22 #include <math.h>
23 #include <time.h>
24 #include <stdlib.h>
25 #include <stdio.h>
26
27 namespace shogun
28 {
29
30 #define PREDICTOR 1
31 #define CORRECTOR 2
32
33 /*****************************************************************
34  replace this by any other function that will exit gracefully
35  in a larger system
36  ***************************************************************/
37
38 void nrerror(char error_text[])
39 {
40  SG_SDEBUG("terminating optimizer - %s\n", error_text);
41  // exit(1);
42 }
43
44 /*****************************************************************
45  taken from numerical recipes and modified to accept pointers
46  moreover numerical recipes code seems to be buggy (at least the
47  ones on the web)
48
49  cholesky solver and backsubstitution
50  leaves upper right triangle intact (rows first order)
51  ***************************************************************/
52
53 #ifdef HAVE_LAPACK
54 bool choldc(float64_t* a, int32_t n, float64_t* p)
55 {
56  if (n<=0)
57  return false;
58
59  float64_t* a2=SG_MALLOC(float64_t, n*n);
60
61  for (int32_t i=0; i<n; i++)
62  {
63  for (int32_t j=0; j<n; j++)
64  a2[n*i+j]=a[n*i+j];
65  }
66
67  /* int for calling external lib */
68  int result=clapack_dpotrf(CblasRowMajor, CblasUpper, (int) n, a2, (int) n);
69
70  for (int32_t i=0; i<n; i++)
71  p[i]=a2[(n+1)*i];
72
73  for (int32_t i=0; i<n; i++)
74  {
75  for (int32_t j=i+1; j<n; j++)
76  {
77  a[n*j+i]=a2[n*i+j];
78  }
79  }
80
81  if (result>0)
82  SG_SDEBUG("Choldc failed, matrix not positive definite\n");
83
84  SG_FREE(a2);
85
86  return result==0;
87 }
88 #else
89 bool choldc(float64_t a[], int32_t n, float64_t p[])
90 {
91  void nrerror(char error_text[]);
92  int32_t i, j, k;
93  float64_t sum;
94
95  for (i = 0; i < n; i++)
96  {
97  for (j = i; j < n; j++)
98  {
99  sum=a[n*i + j];
100
101  for (k=i-1; k>=0; k--)
102  sum -= a[n*i + k]*a[n*j + k];
103
104  if (i == j)
105  {
106  if (sum <= 0.0)
107  {
108  SG_SDEBUG("Choldc failed, matrix not positive definite");
109  sum = 0.0;
110  return false;
111  }
112
113  p[i]=sqrt(sum);
114
115  }
116  else
117  a[n*j + i] = sum/p[i];
118  }
119  }
120
121  return true;
122 }
123 #endif
124
125 void cholsb(
126  float64_t a[], int32_t n, float64_t p[], float64_t b[], float64_t x[])
127 {
128  int32_t i, k;
129  float64_t sum;
130
131  for (i=0; i<n; i++) {
132  sum=b[i];
133  for (k=i-1; k>=0; k--) sum -= a[n*i + k]*x[k];
134  x[i]=sum/p[i];
135  }
136
137  for (i=n-1; i>=0; i--) {
138  sum=x[i];
139  for (k=i+1; k<n; k++) sum -= a[n*k + i]*x[k];
140  x[i]=sum/p[i];
141  }
142 }
143
144 /*****************************************************************
145  sometimes we only need the forward or backward pass of the
146  backsubstitution, hence we provide these two routines separately
147  ***************************************************************/
148
150  float64_t a[], int32_t n, float64_t p[], float64_t b[], float64_t x[])
151 {
152  int32_t i, k;
153  float64_t sum;
154
155  for (i=0; i<n; i++) {
156  sum=b[i];
157  for (k=i-1; k>=0; k--) sum -= a[n*i + k]*x[k];
158  x[i]=sum/p[i];
159  }
160 }
161
163  float64_t a[], int32_t n, float64_t p[], float64_t b[], float64_t x[])
164 {
165  int32_t i, k;
166  float64_t sum;
167
168  for (i=n-1; i>=0; i--) {
169  sum=b[i];
170  for (k=i+1; k<n; k++) sum -= a[n*k + i]*x[k];
171  x[i]=sum/p[i];
172  }
173 }
174
175 /*****************************************************************
176  solves the system | -H_x A' | |x_x| = |c_x|
177  | A H_y| |x_y| |c_y|
178
179  with H_x (and H_y) positive (semidefinite) matrices
180  and n, m the respective sizes of H_x and H_y
181
182  for variables see pg. 48 of notebook or do the calculations on a
183  sheet of paper again
184
185  predictor solves the whole thing, corrector assues that H_x didn't
186  change and relies on the results of the predictor. therefore do
187  _not_ modify workspace
188
189  if you want to speed tune anything in the code here's the right
190  place to do so: about 95% of the time is being spent in
191  here. something like an iterative refinement would be nice,
192  especially when switching from float64_t to single precision. if you
193  have a fast parallel cholesky use it instead of the numrec
194  implementations.
195
196  side effects: changes H_y (but this is just the unit matrix or zero anyway
197  in our case)
198  ***************************************************************/
199
201  int32_t n, int32_t m, float64_t h_x[], float64_t h_y[], float64_t a[],
202  float64_t x_x[], float64_t x_y[], float64_t c_x[], float64_t c_y[],
203  float64_t workspace[], int32_t step)
204 {
205  int32_t i,j,k;
206
207  float64_t *p_x;
208  float64_t *p_y;
209  float64_t *t_a;
210  float64_t *t_c;
211  float64_t *t_y;
212
213  p_x = workspace; /* together n + m + n*m + n + m = n*(m+2)+2*m */
214  p_y = p_x + n;
215  t_a = p_y + m;
216  t_c = t_a + n*m;
217  t_y = t_c + n;
218
219  if (step == PREDICTOR) {
220  if (!choldc(h_x, n, p_x)) /* do cholesky decomposition */
221  return false;
222
223  for (i=0; i<m; i++) /* forward pass for A' */
224  chol_forward(h_x, n, p_x, a+i*n, t_a+i*n);
225
226  for (i=0; i<m; i++) /* compute (h_y + a h_x^-1A') */
227  for (j=i; j<m; j++)
228  for (k=0; k<n; k++)
229  h_y[m*i + j] += t_a[n*j + k] * t_a[n*i + k];
230
231  choldc(h_y, m, p_y); /* and cholesky decomposition */
232  }
233
234  chol_forward(h_x, n, p_x, c_x, t_c);
235  /* forward pass for c */
236
237  for (i=0; i<m; i++) { /* and solve for x_y */
238  t_y[i] = c_y[i];
239  for (j=0; j<n; j++)
240  t_y[i] += t_a[i*n + j] * t_c[j];
241  }
242
243  cholsb(h_y, m, p_y, t_y, x_y);
244
245  for (i=0; i<n; i++) { /* finally solve for x_x */
246  t_c[i] = -t_c[i];
247  for (j=0; j<m; j++)
248  t_c[i] += t_a[j*n + i] * x_y[j];
249  }
250
251  chol_backward(h_x, n, p_x, t_c, x_x);
252  return true;
253 }
254
255 /*****************************************************************
256  matrix vector multiplication (symmetric matrix but only one triangle
257  given). computes m*x = y
258  no need to tune it as it's only of O(n^2) but cholesky is of
259  O(n^3). so don't waste your time _here_ although it isn't very
260  elegant.
261  ***************************************************************/
262
263 void matrix_vector(int32_t n, float64_t m[], float64_t x[], float64_t y[])
264 {
265  int32_t i, j;
266
267  for (i=0; i<n; i++) {
268  y[i] = m[(n+1) * i] * x[i];
269
270  for (j=0; j<i; j++)
271  y[i] += m[i + n*j] * x[j];
272
273  for (j=i+1; j<n; j++)
274  y[i] += m[n*i + j] * x[j];
275  }
276 }
277
278 /*****************************************************************
279  call only this routine; this is the only one you're interested in
281
282  the restart feature exists but it may not be of much use due to the
283  fact that an initial setting, although close but not very close the
284  the actual solution will result in very good starting diagnostics
285  (primal and dual feasibility and small infeasibility gap) but incur
286  later stalling of the optimizer afterwards as we have to enforce
287  positivity of the slacks.
288  ***************************************************************/
289
290 int32_t pr_loqo(
291  int32_t n, int32_t m, float64_t c[], float64_t h_x[], float64_t a[],
292  float64_t b[], float64_t l[], float64_t u[], float64_t primal[],
293  float64_t dual[], int32_t verb, float64_t sigfig_max, int32_t counter_max,
294  float64_t margin, float64_t bound, int32_t restart)
295 {
296  /* the knobs to be tuned ... */
297  /* float64_t margin = -0.95; we will go up to 95% of the
298  distance between old variables and zero */
299  /* float64_t bound = 10; preset value for the start. small
300  values give good initial
301  feasibility but may result in slow
302  convergence afterwards: we're too
303  close to zero */
304  /* to be allocated */
305  float64_t *workspace;
306  float64_t *diag_h_x;
307  float64_t *h_y;
308  float64_t *c_x;
309  float64_t *c_y;
310  float64_t *h_dot_x;
311  float64_t *rho;
312  float64_t *nu;
313  float64_t *tau;
314  float64_t *sigma;
315  float64_t *gamma_z;
316  float64_t *gamma_s;
317
318  float64_t *hat_nu;
319  float64_t *hat_tau;
320
321  float64_t *delta_x;
322  float64_t *delta_y;
323  float64_t *delta_s;
324  float64_t *delta_z;
325  float64_t *delta_g;
326  float64_t *delta_t;
327
328  float64_t *d;
329
330  /* from the header - pointers into primal and dual */
331  float64_t *x;
332  float64_t *y;
333  float64_t *g;
334  float64_t *z;
335  float64_t *s;
336  float64_t *t;
337
338  /* auxiliary variables */
339  float64_t b_plus_1;
340  float64_t c_plus_1;
341
342  float64_t x_h_x;
343  float64_t primal_inf;
344  float64_t dual_inf;
345
346  float64_t sigfig;
347  float64_t primal_obj, dual_obj;
348  float64_t mu;
349  float64_t alfa=-1;
350  int32_t counter = 0;
351
352  int32_t status = STILL_RUNNING;
353
354  int32_t i,j;
355
356  /* memory allocation */
357  workspace = SG_MALLOC(float64_t, (n*(m+2)+2*m));
358  diag_h_x = SG_MALLOC(float64_t, n);
359  h_y = SG_MALLOC(float64_t, m*m);
360  c_x = SG_MALLOC(float64_t, n);
361  c_y = SG_MALLOC(float64_t, m);
362  h_dot_x = SG_MALLOC(float64_t, n);
363
364  rho = SG_MALLOC(float64_t, m);
365  nu = SG_MALLOC(float64_t, n);
366  tau = SG_MALLOC(float64_t, n);
367  sigma = SG_MALLOC(float64_t, n);
368
369  gamma_z = SG_MALLOC(float64_t, n);
370  gamma_s = SG_MALLOC(float64_t, n);
371
372  hat_nu = SG_MALLOC(float64_t, n);
373  hat_tau = SG_MALLOC(float64_t, n);
374
375  delta_x = SG_MALLOC(float64_t, n);
376  delta_y = SG_MALLOC(float64_t, m);
377  delta_s = SG_MALLOC(float64_t, n);
378  delta_z = SG_MALLOC(float64_t, n);
379  delta_g = SG_MALLOC(float64_t, n);
380  delta_t = SG_MALLOC(float64_t, n);
381
382  d = SG_MALLOC(float64_t, n);
383
384  /* pointers into the external variables */
385  x = primal; /* n */
386  g = x + n; /* n */
387  t = g + n; /* n */
388
389  y = dual; /* m */
390  z = y + m; /* n */
391  s = z + n; /* n */
392
393  /* initial settings */
394  b_plus_1 = 1;
395  c_plus_1 = 0;
396  for (i=0; i<n; i++) c_plus_1 += c[i];
397
398  /* get diagonal terms */
399  for (i=0; i<n; i++) diag_h_x[i] = h_x[(n+1)*i];
400
401  /* starting point */
402  if (restart == 1) {
403  /* x, y already preset */
404  for (i=0; i<n; i++) { /* compute g, t for primal feasibility */
405  g[i] = CMath::max(CMath::abs(x[i] - l[i]), bound);
406  t[i] = CMath::max(CMath::abs(u[i] - x[i]), bound);
407  }
408
409  matrix_vector(n, h_x, x, h_dot_x); /* h_dot_x = h_x * x */
410
411  for (i=0; i<n; i++) { /* sigma is a dummy variable to calculate z, s */
412  sigma[i] = c[i] + h_dot_x[i];
413  for (j=0; j<m; j++)
414  sigma[i] -= a[n*j + i] * y[j];
415
416  if (sigma[i] > 0) {
417  s[i] = bound;
418  z[i] = sigma[i] + bound;
419  }
420  else {
421  s[i] = bound - sigma[i];
422  z[i] = bound;
423  }
424  }
425  }
426  else { /* use default start settings */
427  for (i=0; i<m; i++)
428  for (j=i; j<m; j++)
429  h_y[i*m + j] = (i==j) ? 1 : 0;
430
431  for (i=0; i<n; i++) {
432  c_x[i] = c[i];
433  h_x[(n+1)*i] += 1;
434  }
435
436  for (i=0; i<m; i++)
437  c_y[i] = b[i];
438
439  /* and solve the system [-H_x A'; A H_y] [x, y] = [c_x; c_y] */
440  solve_reduced(n, m, h_x, h_y, a, x, y, c_x, c_y, workspace,
441  PREDICTOR);
442
443  /* initialize the other variables */
444  for (i=0; i<n; i++) {
445  g[i] = CMath::max(CMath::abs(x[i] - l[i]), bound);
446  z[i] = CMath::max(CMath::abs(x[i]), bound);
447  t[i] = CMath::max(CMath::abs(u[i] - x[i]), bound);
448  s[i] = CMath::max(CMath::abs(x[i]), bound);
449  }
450  }
451
452  for (i=0, mu=0; i<n; i++)
453  mu += z[i] * g[i] + s[i] * t[i];
454  mu = mu / (2*n);
455
456  /* the main loop */
457  if (verb >= STATUS) {
458  SG_SDEBUG("counter | pri_inf | dual_inf | pri_obj | dual_obj | ");
459  SG_SDEBUG("sigfig | alpha | nu \n");
460  SG_SDEBUG("-------------------------------------------------------");
461  SG_SDEBUG("---------------------------\n");
462  }
463
464  while (status == STILL_RUNNING) {
465  /* predictor */
466
467  /* put back original diagonal values */
468  for (i=0; i<n; i++)
469  h_x[(n+1) * i] = diag_h_x[i];
470
471  matrix_vector(n, h_x, x, h_dot_x); /* compute h_dot_x = h_x * x */
472
473  for (i=0; i<m; i++) {
474  rho[i] = b[i];
475  for (j=0; j<n; j++)
476  rho[i] -= a[n*i + j] * x[j];
477  }
478
479  for (i=0; i<n; i++) {
480  nu[i] = l[i] - x[i] + g[i];
481  tau[i] = u[i] - x[i] - t[i];
482
483  sigma[i] = c[i] - z[i] + s[i] + h_dot_x[i];
484  for (j=0; j<m; j++)
485  sigma[i] -= a[n*j + i] * y[j];
486
487  gamma_z[i] = - z[i];
488  gamma_s[i] = - s[i];
489  }
490
491  /* instrumentation */
492  x_h_x = 0;
493  primal_inf = 0;
494  dual_inf = 0;
495
496  for (i=0; i<n; i++) {
497  x_h_x += h_dot_x[i] * x[i];
498  primal_inf += CMath::sq(tau[i]);
499  primal_inf += CMath::sq(nu[i]);
500  dual_inf += CMath::sq(sigma[i]);
501  }
502  for (i=0; i<m; i++)
503  primal_inf += CMath::sq(rho[i]);
504  primal_inf = sqrt(primal_inf)/b_plus_1;
505  dual_inf = sqrt(dual_inf)/c_plus_1;
506
507  primal_obj = 0.5 * x_h_x;
508  dual_obj = -0.5 * x_h_x;
509  for (i=0; i<n; i++) {
510  primal_obj += c[i] * x[i];
511  dual_obj += l[i] * z[i] - u[i] * s[i];
512  }
513  for (i=0; i<m; i++)
514  dual_obj += b[i] * y[i];
515
516  sigfig = log10(CMath::abs(primal_obj) + 1) -
517  log10(CMath::abs(primal_obj - dual_obj));
518  sigfig = CMath::max(sigfig, 0.0);
519
520  /* the diagnostics - after we computed our results we will
521  analyze them */
522
523  if (counter > counter_max) status = ITERATION_LIMIT;
524  if (sigfig > sigfig_max) status = OPTIMAL_SOLUTION;
525  if (primal_inf > 10e100) status = PRIMAL_INFEASIBLE;
526  if (dual_inf > 10e100) status = DUAL_INFEASIBLE;
527  if ((primal_inf > 10e100) & (dual_inf > 10e100)) status = PRIMAL_AND_DUAL_INFEASIBLE;
528  if (CMath::abs(primal_obj) > 10e100) status = PRIMAL_UNBOUNDED;
529  if (CMath::abs(dual_obj) > 10e100) status = DUAL_UNBOUNDED;
530
531  /* write some nice routine to enforce the time limit if you
532  _really_ want, however it's quite useless as you can compute
533  the time from the maximum number of iterations as every
534  iteration costs one cholesky decomposition plus a couple of
535  backsubstitutions */
536
537  /* generate report */
538  if ((verb >= FLOOD) | ((verb == STATUS) & (status != 0)))
539  SG_SDEBUG("%7i | %.2e | %.2e | % .2e | % .2e | %6.3f | %.4f | %.2e\n",
540  counter, primal_inf, dual_inf, primal_obj, dual_obj,
541  sigfig, alfa, mu);
542
543  counter++;
544
545  if (status == 0) { /* we may keep on going, otherwise
546  it'll cost one loop extra plus a
547  messed up main diagonal of h_x */
548  /* intermediate variables (the ones with hat) */
549  for (i=0; i<n; i++) {
550  hat_nu[i] = nu[i] + g[i] * gamma_z[i] / z[i];
551  hat_tau[i] = tau[i] - t[i] * gamma_s[i] / s[i];
552  /* diagonal terms */
553  d[i] = z[i] / g[i] + s[i] / t[i];
554  }
555
556  /* initialization before the cholesky solver */
557  for (i=0; i<n; i++) {
558  h_x[(n+1)*i] = diag_h_x[i] + d[i];
559  c_x[i] = sigma[i] - z[i] * hat_nu[i] / g[i] -
560  s[i] * hat_tau[i] / t[i];
561  }
562  for (i=0; i<m; i++) {
563  c_y[i] = rho[i];
564  for (j=i; j<m; j++)
565  h_y[m*i + j] = 0;
566  }
567
568  /* and do it */
569  if (!solve_reduced(n, m, h_x, h_y, a, delta_x, delta_y, c_x, c_y, workspace, PREDICTOR))
570  {
571  status=INCONSISTENT;
572  goto exit_optimizer;
573  }
574
575  for (i=0; i<n; i++) {
576  /* backsubstitution */
577  delta_s[i] = s[i] * (delta_x[i] - hat_tau[i]) / t[i];
578  delta_z[i] = z[i] * (hat_nu[i] - delta_x[i]) / g[i];
579
580  delta_g[i] = g[i] * (gamma_z[i] - delta_z[i]) / z[i];
581  delta_t[i] = t[i] * (gamma_s[i] - delta_s[i]) / s[i];
582
583  /* central path (corrector) */
584  gamma_z[i] = mu / g[i] - z[i] - delta_z[i] * delta_g[i] / g[i];
585  gamma_s[i] = mu / t[i] - s[i] - delta_s[i] * delta_t[i] / t[i];
586
587  /* (some more intermediate variables) the hat variables */
588  hat_nu[i] = nu[i] + g[i] * gamma_z[i] / z[i];
589  hat_tau[i] = tau[i] - t[i] * gamma_s[i] / s[i];
590
591  /* initialization before the cholesky */
592  c_x[i] = sigma[i] - z[i] * hat_nu[i] / g[i] - s[i] * hat_tau[i] / t[i];
593  }
594
595  for (i=0; i<m; i++) { /* comput c_y and rho */
596  c_y[i] = rho[i];
597  for (j=i; j<m; j++)
598  h_y[m*i + j] = 0;
599  }
600
601  /* and do it */
602  solve_reduced(n, m, h_x, h_y, a, delta_x, delta_y, c_x, c_y, workspace,
603  CORRECTOR);
604
605  for (i=0; i<n; i++) {
606  /* backsubstitution */
607  delta_s[i] = s[i] * (delta_x[i] - hat_tau[i]) / t[i];
608  delta_z[i] = z[i] * (hat_nu[i] - delta_x[i]) / g[i];
609
610  delta_g[i] = g[i] * (gamma_z[i] - delta_z[i]) / z[i];
611  delta_t[i] = t[i] * (gamma_s[i] - delta_s[i]) / s[i];
612  }
613
614  alfa = -1;
615  for (i=0; i<n; i++) {
616  alfa = CMath::min(alfa, delta_g[i]/g[i]);
617  alfa = CMath::min(alfa, delta_t[i]/t[i]);
618  alfa = CMath::min(alfa, delta_s[i]/s[i]);
619  alfa = CMath::min(alfa, delta_z[i]/z[i]);
620  }
621  alfa = (margin - 1) / alfa;
622
623  /* compute mu */
624  for (i=0, mu=0; i<n; i++)
625  mu += z[i] * g[i] + s[i] * t[i];
626  mu = mu / (2*n);
627  mu = mu * CMath::sq((alfa - 1) / (alfa + 10));
628
629  for (i=0; i<n; i++) {
630  x[i] += alfa * delta_x[i];
631  g[i] += alfa * delta_g[i];
632  t[i] += alfa * delta_t[i];
633  z[i] += alfa * delta_z[i];
634  s[i] += alfa * delta_s[i];
635  }
636
637  for (i=0; i<m; i++)
638  y[i] += alfa * delta_y[i];
639  }
640  }
641
642 exit_optimizer:
643  if ((status == 1) && (verb >= STATUS)) {
644  SG_SDEBUG("----------------------------------------------------------------------------------\n");
645  SG_SDEBUG("optimization converged\n");
646  }
647
648  /* free memory */
649  SG_FREE(workspace);
650  SG_FREE(diag_h_x);
651  SG_FREE(h_y);
652  SG_FREE(c_x);
653  SG_FREE(c_y);
654  SG_FREE(h_dot_x);
655
656  SG_FREE(rho);
657  SG_FREE(nu);
658  SG_FREE(tau);
659  SG_FREE(sigma);
660  SG_FREE(gamma_z);
661  SG_FREE(gamma_s);
662
663  SG_FREE(hat_nu);
664  SG_FREE(hat_tau);
665
666  SG_FREE(delta_x);
667  SG_FREE(delta_y);
668  SG_FREE(delta_s);
669  SG_FREE(delta_z);
670  SG_FREE(delta_g);
671  SG_FREE(delta_t);
672
673  SG_FREE(d);
674