SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
LaRank.cpp
Go to the documentation of this file.
1 // -*- C++ -*-
2 // Main functions of the LaRank algorithm for soving Multiclass SVM
3 // Copyright (C) 2008- Antoine Bordes
4 // Shogun specific adjustments (w) 2009 Soeren Sonnenburg
5 
6 // This library is free software; you can redistribute it and/or
7 // modify it under the terms of the GNU Lesser General Public
8 // License as published by the Free Software Foundation; either
9 // version 2.1 of the License, or (at your option) any later version.
10 //
11 // This program is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // aint64_t with this program; if not, write to the Free Software
18 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
19 //
20 /***********************************************************************
21  *
22  * LUSH Lisp Universal Shell
23  * Copyright (C) 2002 Leon Bottou, Yann Le Cun, AT&T Corp, NECI.
24  * Includes parts of TL3:
25  * Copyright (C) 1987-1999 Leon Bottou and Neuristique.
26  * Includes selected parts of SN3.2:
27  * Copyright (C) 1991-2001 AT&T Corp.
28  *
29  * This program is free software; you can redistribute it and/or modify
30  * it under the terms of the GNU General Public License as published by
31  * the Free Software Foundation; either version 2 of the License, or
32  * (at your option) any later version.
33  *
34  * This program is distributed in the hope that it will be useful,
35  * but WITHOUT ANY WARRANTY; without even the implied warranty of
36  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37  * GNU General Public License for more details.
38  *
39  * You should have received a copy of the GNU General Public License
40  * aint64_t with this program; if not, write to the Free Software
41  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
42  *
43  ***********************************************************************/
44 
45 /***********************************************************************
46  * $Id: kcache.c,v 1.9 2007/01/25 22:42:09 leonb Exp $
47  **********************************************************************/
48 
49 #include <vector>
50 #include <algorithm>
51 
52 #include <shogun/io/SGIO.h>
53 #include <shogun/lib/Signal.h>
54 #include <shogun/lib/Time.h>
58 #include <shogun/kernel/Kernel.h>
60 
61 using namespace shogun;
62 
63 namespace shogun
64 {
65  static larank_kcache_t* larank_kcache_create (CKernel* kernelfunc)
66  {
67  larank_kcache_t *self;
68  self = SG_CALLOC (larank_kcache_t, 1);
69  self->l = 0;
70  self->maxrowlen = 0;
71  self->func = kernelfunc;
72  self->prevbuddy = self;
73  self->nextbuddy = self;
74  self->cursize = sizeof (larank_kcache_t);
75  self->maxsize = 256 * 1024 * 1024;
76  self->qprev = SG_MALLOC(int32_t, 1);
77  self->qnext = SG_MALLOC(int32_t, 1);
78  self->rnext = self->qnext + 1;
79  self->rprev = self->qprev + 1;
80  self->rprev[-1] = -1;
81  self->rnext[-1] = -1;
82  return self;
83  }
84 
85  static void xtruncate (larank_kcache_t * self, int32_t k, int32_t nlen)
86  {
87  int32_t olen = self->rsize[k];
88  if (nlen < olen)
89  {
90  float32_t *ndata;
91  float32_t *odata = self->rdata[k];
92  if (nlen > 0)
93  {
94  ndata = SG_MALLOC(float32_t, nlen);
95  memcpy (ndata, odata, nlen * sizeof (float32_t));
96  }
97  else
98  {
99  ndata = 0;
100  self->rnext[self->rprev[k]] = self->rnext[k];
101  self->rprev[self->rnext[k]] = self->rprev[k];
102  self->rnext[k] = self->rprev[k] = k;
103  }
104  SG_FREE (odata);
105  self->rdata[k] = ndata;
106  self->rsize[k] = nlen;
107  self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
108  }
109  }
110 
111  static void xpurge (larank_kcache_t * self)
112  {
113  if (self->cursize > self->maxsize)
114  {
115  int32_t k = self->rprev[-1];
116  while (self->cursize > self->maxsize && k != self->rnext[-1])
117  {
118  int32_t pk = self->rprev[k];
119  xtruncate (self, k, 0);
120  k = pk;
121  }
122  }
123  }
124 
125  static void larank_kcache_set_maximum_size (larank_kcache_t * self, int64_t entries)
126  {
127  ASSERT (self);
128  ASSERT (entries > 0);
129  self->maxsize = entries;
130  xpurge (self);
131  }
132 
133  static void larank_kcache_destroy (larank_kcache_t * self)
134  {
135  if (self)
136  {
137  int32_t i;
138  larank_kcache_t *nb = self->nextbuddy;
139  larank_kcache_t *pb = self->prevbuddy;
140  pb->nextbuddy = nb;
141  nb->prevbuddy = pb;
142  /* delete */
143  if (self->i2r)
144  SG_FREE (self->i2r);
145  if (self->r2i)
146  SG_FREE (self->r2i);
147  if (self->rdata)
148  for (i = 0; i < self->l; i++)
149  if (self->rdata[i])
150  SG_FREE (self->rdata[i]);
151  if (self->rdata)
152  SG_FREE (self->rdata);
153  if (self->rsize)
154  SG_FREE (self->rsize);
155  if (self->rdiag)
156  SG_FREE (self->rdiag);
157  if (self->qnext)
158  SG_FREE (self->qnext);
159  if (self->qprev)
160  SG_FREE (self->qprev);
161  memset (self, 0, sizeof (larank_kcache_t));
162  SG_FREE (self);
163  }
164  }
165 
166  static void xminsize (larank_kcache_t * self, int32_t n)
167  {
168  int32_t ol = self->l;
169  if (n > ol)
170  {
171  int32_t i;
172  int32_t nl = CMath::max (256, ol);
173  while (nl < n)
174  nl = nl + nl;
175  self->i2r = SG_REALLOC (int32_t, self->i2r, nl);
176  self->r2i = SG_REALLOC (int32_t, self->r2i, nl);
177  self->rsize = SG_REALLOC (int32_t, self->rsize, nl);
178  self->qnext = SG_REALLOC (int32_t, self->qnext, (1 + nl));
179  self->qprev = SG_REALLOC (int32_t, self->qprev, (1 + nl));
180  self->rdiag = SG_REALLOC (float32_t, self->rdiag, nl);
181  self->rdata = SG_REALLOC (float32_t*, self->rdata, nl);
182  self->rnext = self->qnext + 1;
183  self->rprev = self->qprev + 1;
184  for (i = ol; i < nl; i++)
185  {
186  self->i2r[i] = i;
187  self->r2i[i] = i;
188  self->rsize[i] = -1;
189  self->rnext[i] = i;
190  self->rprev[i] = i;
191  self->rdata[i] = 0;
192  }
193  self->l = nl;
194  }
195  }
196 
197  static int32_t* larank_kcache_r2i (larank_kcache_t * self, int32_t n)
198  {
199  xminsize (self, n);
200  return self->r2i;
201  }
202 
203  static void xextend (larank_kcache_t * self, int32_t k, int32_t nlen)
204  {
205  int32_t olen = self->rsize[k];
206  if (nlen > olen)
207  {
208  float32_t *ndata = SG_MALLOC(float32_t, nlen);
209  if (olen > 0)
210  {
211  float32_t *odata = self->rdata[k];
212  memcpy (ndata, odata, olen * sizeof (float32_t));
213  SG_FREE (odata);
214  }
215  self->rdata[k] = ndata;
216  self->rsize[k] = nlen;
217  self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
218  self->maxrowlen = CMath::max (self->maxrowlen, nlen);
219  }
220  }
221 
222  static void xswap (larank_kcache_t * self, int32_t i1, int32_t i2, int32_t r1, int32_t r2)
223  {
224  /* swap row data */
225  if (r1 < self->maxrowlen || r2 < self->maxrowlen)
226  {
227  int32_t mrl = 0;
228  int32_t k = self->rnext[-1];
229  while (k >= 0)
230  {
231  int32_t nk = self->rnext[k];
232  int32_t n = self->rsize[k];
233  int32_t rr = self->i2r[k];
234  float32_t *d = self->rdata[k];
235  if (r1 < n)
236  {
237  if (r2 < n)
238  {
239  float32_t t1 = d[r1];
240  float32_t t2 = d[r2];
241  d[r1] = t2;
242  d[r2] = t1;
243  }
244  else if (rr == r2)
245  {
246  d[r1] = self->rdiag[k];
247  }
248  else
249  {
250  int32_t arsize = self->rsize[i2];
251  if (rr < arsize && rr != r1)
252  d[r1] = self->rdata[i2][rr];
253  else
254  xtruncate (self, k, r1);
255  }
256  }
257  else if (r2 < n)
258  {
259  if (rr == r1)
260  {
261  d[r2] = self->rdiag[k];
262  }
263  else
264  {
265  int32_t arsize = self->rsize[i1];
266  if (rr < arsize && rr != r2)
267  d[r2] = self->rdata[i1][rr];
268  else
269  xtruncate (self, k, r2);
270  }
271  }
272  mrl = CMath::max (mrl, self->rsize[k]);
273  k = nk;
274  }
275  self->maxrowlen = mrl;
276  }
277  /* swap r2i and i2r */
278  self->r2i[r1] = i2;
279  self->r2i[r2] = i1;
280  self->i2r[i1] = r2;
281  self->i2r[i2] = r1;
282  }
283 
284  static void larank_kcache_swap_rr (larank_kcache_t * self, int32_t r1, int32_t r2)
285  {
286  xminsize (self, 1 + CMath::max(r1, r2));
287  xswap (self, self->r2i[r1], self->r2i[r2], r1, r2);
288  }
289 
290  static void larank_kcache_swap_ri (larank_kcache_t * self, int32_t r1, int32_t i2)
291  {
292  xminsize (self, 1 + CMath::max (r1, i2));
293  xswap (self, self->r2i[r1], i2, r1, self->i2r[i2]);
294  }
295 
296  static float64_t xquery (larank_kcache_t * self, int32_t i, int32_t j)
297  {
298  /* search buddies */
299  larank_kcache_t *cache = self->nextbuddy;
300  do
301  {
302  int32_t l = cache->l;
303  if (i < l && j < l)
304  {
305  int32_t s = cache->rsize[i];
306  int32_t p = cache->i2r[j];
307  if (p < s)
308  return cache->rdata[i][p];
309  if (i == j && s >= 0)
310  return cache->rdiag[i];
311  p = cache->i2r[i];
312  s = cache->rsize[j];
313  if (p < s)
314  return cache->rdata[j][p];
315  }
316  cache = cache->nextbuddy;
317  }
318  while (cache != self);
319  /* compute */
320  return self->func->kernel(i, j);
321  }
322 
323 
324  static float64_t larank_kcache_query (larank_kcache_t * self, int32_t i, int32_t j)
325  {
326  ASSERT (self);
327  ASSERT (i >= 0);
328  ASSERT (j >= 0);
329  return xquery (self, i, j);
330  }
331 
332 
333  static void larank_kcache_set_buddy (larank_kcache_t * self, larank_kcache_t * buddy)
334  {
335  larank_kcache_t *p = self;
336  larank_kcache_t *selflast = self->prevbuddy;
337  larank_kcache_t *buddylast = buddy->prevbuddy;
338  /* check functions are identical */
339  ASSERT (self->func == buddy->func);
340  /* make sure we are not already buddies */
341  do
342  {
343  if (p == buddy)
344  return;
345  p = p->nextbuddy;
346  }
347  while (p != self);
348  /* link */
349  selflast->nextbuddy = buddy;
350  buddy->prevbuddy = selflast;
351  buddylast->nextbuddy = self;
352  self->prevbuddy = buddylast;
353  }
354 
355 
356  static float32_t* larank_kcache_query_row (larank_kcache_t * self, int32_t i, int32_t len)
357  {
358  ASSERT (i >= 0);
359  if (i < self->l && len <= self->rsize[i])
360  {
361  self->rnext[self->rprev[i]] = self->rnext[i];
362  self->rprev[self->rnext[i]] = self->rprev[i];
363  }
364  else
365  {
366  int32_t olen, p;
367  float32_t *d;
368  if (i >= self->l || len >= self->l)
369  xminsize (self, CMath::max (1 + i, len));
370  olen = self->rsize[i];
371  if (olen < len)
372  {
373  if (olen < 0)
374  {
375  self->rdiag[i] = self->func->kernel(i, i);
376  olen = self->rsize[i] = 0;
377  }
378  xextend (self, i, len);
379  d = self->rdata[i];
380  self->rsize[i] = olen;
381  for (p = olen; p < len; p++)
382  d[p] = larank_kcache_query (self, self->r2i[p], i);
383  self->rsize[i] = len;
384  }
385  self->rnext[self->rprev[i]] = self->rnext[i];
386  self->rprev[self->rnext[i]] = self->rprev[i];
387  xpurge (self);
388  }
389  self->rprev[i] = -1;
390  self->rnext[i] = self->rnext[-1];
391  self->rnext[self->rprev[i]] = i;
392  self->rprev[self->rnext[i]] = i;
393  return self->rdata[i];
394  }
395 
396 }
397 
398 
399 // Initializing an output class (basically creating a kernel cache for it)
400 void LaRankOutput::initialize (CKernel* kfunc, int64_t cache)
401 {
402  kernel = larank_kcache_create (kfunc);
403  larank_kcache_set_maximum_size (kernel, cache * 1024 * 1024);
404  beta = SG_MALLOC(float32_t, 1);
405  g = SG_MALLOC(float32_t, 1);
406  *beta=0;
407  *g=0;
408  l = 0;
409 }
410 
411 // Destroying an output class (basically destroying the kernel cache)
412 void LaRankOutput::destroy ()
413 {
414  larank_kcache_destroy (kernel);
415  kernel=NULL;
416  SG_FREE(beta);
417  SG_FREE(g);
418  beta=NULL;
419  g=NULL;
420 }
421 
422 // !Important! Computing the score of a given input vector for the actual output
423 float64_t LaRankOutput::computeScore (int32_t x_id)
424 {
425  if (l == 0)
426  return 0;
427  else
428  {
429  float32_t *row = larank_kcache_query_row (kernel, x_id, l);
430  return SGVector<float32_t>::dot (beta, row, l);
431  }
432 }
433 
434 // !Important! Computing the gradient of a given input vector for the actual output
435 float64_t LaRankOutput::computeGradient (int32_t xi_id, int32_t yi, int32_t ythis)
436 {
437  return (yi == ythis ? 1 : 0) - computeScore (xi_id);
438 }
439 
440 // Updating the solution in the actual output
441 void LaRankOutput::update (int32_t x_id, float64_t lambda, float64_t gp)
442 {
443  int32_t *r2i = larank_kcache_r2i (kernel, l);
444  int64_t xr = l + 1;
445  for (int32_t r = 0; r < l; r++)
446  if (r2i[r] == x_id)
447  {
448  xr = r;
449  break;
450  }
451 
452  // updates the cache order and the beta coefficient
453  if (xr < l)
454  {
455  beta[xr]+=lambda;
456  }
457  else
458  {
459  larank_kcache_swap_ri (kernel, l, x_id);
460  SGVector<float32_t>::resize(g, l, l+1);
461  SGVector<float32_t>::resize(beta, l, l+1);
462  g[l]=gp;
463  beta[l]=lambda;
464  l++;
465  }
466 
467  // update stored gradients
468  float32_t *row = larank_kcache_query_row (kernel, x_id, l);
469  for (int32_t r = 0; r < l; r++)
470  {
471  float64_t oldg = g[r];
472  g[r]=oldg - lambda * row[r];
473  }
474 }
475 
476 // Linking the cahe of this output to the cache of an other "buddy" output
477 // so that if a requested value is not found in this cache, you can ask your buddy if it has it.
478 void LaRankOutput::set_kernel_buddy (larank_kcache_t * bud)
479 {
480  larank_kcache_set_buddy (bud, kernel);
481 }
482 
483 // Removing useless support vectors (for which beta=0)
484 int32_t LaRankOutput::cleanup ()
485 {
486  int32_t count = 0;
487  std::vector < int32_t >idx;
488  for (int32_t x = 0; x < l; x++)
489  {
490  if ((beta[x] < FLT_EPSILON) && (beta[x] > -FLT_EPSILON))
491  {
492  idx.push_back (x);
493  count++;
494  }
495  }
496  int32_t new_l = l - count;
497  for (int32_t xx = 0; xx < count; xx++)
498  {
499  int32_t i = idx[xx] - xx;
500  for (int32_t r = i; r < (l - 1); r++)
501  {
502  larank_kcache_swap_rr (kernel, r, int64_t(r) + 1);
503  beta[r]=beta[r + 1];
504  g[r]=g[r + 1];
505  }
506  }
507  SGVector<float32_t>::resize(beta, l, new_l+1);
508  SGVector<float32_t>::resize(g, l, new_l+1);
509  beta[new_l]=0;
510  g[new_l]=0;
511  l = new_l;
512  return count;
513 }
514 
515 // --- Below are information or "get" functions --- //
516 //
517 float64_t LaRankOutput::getW2 ()
518 {
519  float64_t sum = 0;
520  int32_t *r2i = larank_kcache_r2i (kernel, l + 1);
521  for (int32_t r = 0; r < l; r++)
522  {
523  float32_t *row_r = larank_kcache_query_row (kernel, r2i[r], l);
524  sum += beta[r] * SGVector<float32_t>::dot (beta, row_r, l);
525  }
526  return sum;
527 }
528 
529 float64_t LaRankOutput::getKii (int32_t x_id)
530 {
531  return larank_kcache_query (kernel, x_id, x_id);
532 }
533 
534 //
535 float64_t LaRankOutput::getBeta (int32_t x_id)
536 {
537  int32_t *r2i = larank_kcache_r2i (kernel, l);
538  int32_t xr = -1;
539  for (int32_t r = 0; r < l; r++)
540  if (r2i[r] == x_id)
541  {
542  xr = r;
543  break;
544  }
545  return (xr < 0 ? 0 : beta[xr]);
546 }
547 
548 //
549 float64_t LaRankOutput::getGradient (int32_t x_id)
550 {
551  int32_t *r2i = larank_kcache_r2i (kernel, l);
552  int32_t xr = -1;
553  for (int32_t r = 0; r < l; r++)
554  if (r2i[r] == x_id)
555  {
556  xr = r;
557  break;
558  }
559  return (xr < 0 ? 0 : g[xr]);
560 }
561 bool LaRankOutput::isSupportVector (int32_t x_id) const
562 {
563  int32_t *r2i = larank_kcache_r2i (kernel, l);
564  int32_t xr = -1;
565  for (int32_t r = 0; r < l; r++)
566  if (r2i[r] == x_id)
567  {
568  xr = r;
569  break;
570  }
571  return (xr >= 0);
572 }
573 
574 //
575 int32_t LaRankOutput::getSV (float32_t* &sv) const
576 {
577  sv=SG_MALLOC(float32_t, l);
578  int32_t *r2i = larank_kcache_r2i (kernel, l);
579  for (int32_t r = 0; r < l; r++)
580  sv[r]=r2i[r];
581  return l;
582 }
583 
585  nb_seen_examples (0), nb_removed (0),
586  n_pro (0), n_rep (0), n_opt (0),
587  w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
588  batch_mode(true), step(0)
589 {
590 }
591 
594  nb_seen_examples (0), nb_removed (0),
595  n_pro (0), n_rep (0), n_opt (0),
596  w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
597  batch_mode(true), step(0)
598 {
599 }
600 
602 {
603  destroy();
604 }
605 
607 {
608  tau = 0.0001;
609 
610  ASSERT(m_kernel);
613 
615 
616  if (data)
617  {
618  if (data->get_num_vectors() != m_labels->get_num_labels())
619  {
620  SG_ERROR("Numbert of vectors (%d) does not match number of labels (%d)\n",
622  }
623  m_kernel->init(data, data);
624  }
625 
627 
630 
631  int32_t n_it = 1;
632  float64_t gap = DBL_MAX;
633 
634  SG_INFO("Training on %d examples\n", nb_train);
635  while (gap > get_C() && (!CSignal::cancel_computations())) // stopping criteria
636  {
637  float64_t tr_err = 0;
638  int32_t ind = step;
639  for (int32_t i = 0; i < nb_train; i++)
640  {
641  int32_t y=((CMulticlassLabels*) m_labels)->get_label(i);
642  if (add (i, y) != y) // call the add function
643  tr_err++;
644 
645  if (ind && i / ind)
646  {
647  SG_DEBUG("Done: %d %% Train error (online): %f%%\n",
648  (int32_t) (((float64_t) i) / nb_train * 100), (tr_err / ((float64_t) i + 1)) * 100);
649  ind += step;
650  }
651  }
652 
653  SG_DEBUG("End of iteration %d\n", n_it++);
654  SG_DEBUG("Train error (online): %f%%\n", (tr_err / nb_train) * 100);
655  gap = computeGap ();
656  SG_ABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(DBL_MAX), -CMath::log10(get_C()), 6);
657 
658  if (!batch_mode) // skip stopping criteria if online mode
659  gap = 0;
660  }
661  SG_DONE();
662 
663  int32_t num_classes = outputs.size();
664  create_multiclass_svm(num_classes);
665  SG_DEBUG("%d classes\n", num_classes);
666 
667  // Used for saving a model file
668  int32_t i=0;
669  for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
670  {
671  const LaRankOutput* o=&(it->second);
672 
673  larank_kcache_t* k=o->getKernel();
674  int32_t l=o->get_l();
675  float32_t* beta=o->getBetas();
676  int32_t *r2i = larank_kcache_r2i (k, l);
677 
678  ASSERT(l>0);
679  SG_DEBUG("svm[%d] has %d sv, b=%f\n", i, l, 0.0);
680 
681  CSVM* svm=new CSVM(l);
682 
683  for (int32_t j=0; j<l; j++)
684  {
685  svm->set_alpha(j, beta[j]);
686  svm->set_support_vector(j, r2i[j]);
687  }
688 
689  svm->set_bias(0);
690  set_svm(i, svm);
691  i++;
692  }
693  destroy();
694 
695  return true;
696 }
697 
698 // LEARNING FUNCTION: add new patterns and run optimization steps selected with adaptative schedule
699 int32_t CLaRank::add (int32_t x_id, int32_t yi)
700 {
701  ++nb_seen_examples;
702  // create a new output object if this one has never been seen before
703  if (!getOutput (yi))
704  {
705  outputs.insert (std::make_pair (yi, LaRankOutput ()));
706  LaRankOutput *cur = getOutput (yi);
707  cur->initialize (m_kernel, cache);
708  if (outputs.size () == 1)
709  y0 = outputs.begin ()->first;
710  // link the cache of this new output to a buddy
711  if (outputs.size () > 1)
712  {
713  LaRankOutput *out0 = getOutput (y0);
714  cur->set_kernel_buddy (out0->getKernel ());
715  }
716  }
717 
718  LaRankPattern tpattern (x_id, yi);
719  LaRankPattern & pattern = (patterns.isPattern (x_id)) ? patterns.getPattern (x_id) : tpattern;
720 
721  // ProcessNew with the "fresh" pattern
722  float64_t time1 = CTime::get_curtime();
723  process_return_t pro_ret = process (pattern, processNew);
724  float64_t dual_increase = pro_ret.dual_increase;
725  float64_t duration = (CTime::get_curtime() - time1);
726  float64_t coeff = dual_increase / (0.00001 + duration);
727  dual += dual_increase;
728  n_pro++;
729  w_pro = 0.05 * coeff + (1 - 0.05) * w_pro;
730 
731  // ProcessOld & Optimize until ready for a new processnew
732  // (Adaptative schedule here)
733  for (;;)
734  {
735  float64_t w_sum = w_pro + w_rep + w_opt;
736  float64_t prop_min = w_sum / 20;
737  if (w_pro < prop_min)
738  w_pro = prop_min;
739  if (w_rep < prop_min)
740  w_rep = prop_min;
741  if (w_opt < prop_min)
742  w_opt = prop_min;
743  w_sum = w_pro + w_rep + w_opt;
744  float64_t r = CMath::random(0.0, w_sum);
745  if (r <= w_pro)
746  {
747  break;
748  }
749  else if ((r > w_pro) && (r <= w_pro + w_rep)) // ProcessOld here
750  {
751  float64_t ltime1 = CTime::get_curtime ();
752  float64_t ldual_increase = reprocess ();
753  float64_t lduration = (CTime::get_curtime () - ltime1);
754  float64_t lcoeff = ldual_increase / (0.00001 + lduration);
755  dual += ldual_increase;
756  n_rep++;
757  w_rep = 0.05 * lcoeff + (1 - 0.05) * w_rep;
758  }
759  else // Optimize here
760  {
761  float64_t ltime1 = CTime::get_curtime ();
762  float64_t ldual_increase = optimize ();
763  float64_t lduration = (CTime::get_curtime () - ltime1);
764  float64_t lcoeff = ldual_increase / (0.00001 + lduration);
765  dual += ldual_increase;
766  n_opt++;
767  w_opt = 0.05 * lcoeff + (1 - 0.05) * w_opt;
768  }
769  }
770  if (nb_seen_examples % 100 == 0) // Cleanup useless Support Vectors/Patterns sometimes
771  nb_removed += cleanup ();
772  return pro_ret.ypred;
773 }
774 
775 // PREDICTION FUNCTION: main function in la_rank_classify
776 int32_t CLaRank::predict (int32_t x_id)
777 {
778  int32_t res = -1;
779  float64_t score_max = -DBL_MAX;
780  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
781  {
782  float64_t score = it->second.computeScore (x_id);
783  if (score > score_max)
784  {
785  score_max = score;
786  res = it->first;
787  }
788  }
789  return res;
790 }
791 
793 {
794  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
795  it->second.destroy ();
796  outputs.clear();
797 }
798 
799 
800 // Compute Duality gap (costly but used in stopping criteria in batch mode)
802 {
803  float64_t sum_sl = 0;
804  float64_t sum_bi = 0;
805  for (uint32_t i = 0; i < patterns.maxcount (); ++i)
806  {
807  const LaRankPattern & p = patterns[i];
808  if (!p.exists ())
809  continue;
810  LaRankOutput *out = getOutput (p.y);
811  if (!out)
812  continue;
813  sum_bi += out->getBeta (p.x_id);
814  float64_t gi = out->computeGradient (p.x_id, p.y, p.y);
815  float64_t gmin = DBL_MAX;
816  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
817  {
818  if (it->first != p.y && it->second.isSupportVector (p.x_id))
819  {
820  float64_t g =
821  it->second.computeGradient (p.x_id, p.y, it->first);
822  if (g < gmin)
823  gmin = g;
824  }
825  }
826  sum_sl += CMath::max (0.0, gi - gmin);
827  }
828  return CMath::max (0.0, computeW2 () + get_C() * sum_sl - sum_bi);
829 }
830 
831 // Nuber of classes so far
832 uint32_t CLaRank::getNumOutputs () const
833 {
834  return outputs.size ();
835 }
836 
837 // Number of Support Vectors
838 int32_t CLaRank::getNSV ()
839 {
840  int32_t res = 0;
841  for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
842  {
843  float32_t* sv=NULL;
844  res += it->second.getSV (sv);
845  SG_FREE(sv);
846  }
847  return res;
848 }
849 
850 // Norm of the parameters vector
852 {
853  float64_t res = 0;
854  for (uint32_t i = 0; i < patterns.maxcount (); ++i)
855  {
856  const LaRankPattern & p = patterns[i];
857  if (!p.exists ())
858  continue;
859  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
860  if (it->second.getBeta (p.x_id))
861  res += it->second.getBeta (p.x_id) * it->second.computeScore (p.x_id);
862  }
863  return res;
864 }
865 
866 // Compute Dual objective value
868 {
869  float64_t res = 0;
870  for (uint32_t i = 0; i < patterns.maxcount (); ++i)
871  {
872  const LaRankPattern & p = patterns[i];
873  if (!p.exists ())
874  continue;
875  LaRankOutput *out = getOutput (p.y);
876  if (!out)
877  continue;
878  res += out->getBeta (p.x_id);
879  }
880  return res - computeW2 () / 2;
881 }
882 
883 LaRankOutput *CLaRank::getOutput (int32_t index)
884 {
885  outputhash_t::iterator it = outputs.find (index);
886  return it == outputs.end ()? NULL : &it->second;
887 }
888 
889 // IMPORTANT Main SMO optimization step
890 CLaRank::process_return_t CLaRank::process (const LaRankPattern & pattern, process_type ptype)
891 {
892  process_return_t pro_ret = process_return_t (0, 0);
893 
894  /*
895  ** compute gradient and sort
896  */
897  std::vector < outputgradient_t > outputgradients;
898 
899  outputgradients.reserve (getNumOutputs ());
900 
901  std::vector < outputgradient_t > outputscores;
902  outputscores.reserve (getNumOutputs ());
903 
904  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
905  {
906  if (ptype != processOptimize
907  || it->second.isSupportVector (pattern.x_id))
908  {
909  float64_t g =
910  it->second.computeGradient (pattern.x_id, pattern.y, it->first);
911  outputgradients.push_back (outputgradient_t (it->first, g));
912  if (it->first == pattern.y)
913  outputscores.push_back (outputgradient_t (it->first, (1 - g)));
914  else
915  outputscores.push_back (outputgradient_t (it->first, -g));
916  }
917  }
918 
919  std::sort (outputgradients.begin (), outputgradients.end ());
920 
921  /*
922  ** determine the prediction
923  */
924  std::sort (outputscores.begin (), outputscores.end ());
925  pro_ret.ypred = outputscores[0].output;
926 
927  /*
928  ** Find yp (1st part of the pair)
929  */
930  outputgradient_t ygp;
931  LaRankOutput *outp = NULL;
932  uint32_t p;
933  for (p = 0; p < outputgradients.size (); ++p)
934  {
935  outputgradient_t & current = outputgradients[p];
936  LaRankOutput *output = getOutput (current.output);
937  bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
938  bool goodclass = current.output == pattern.y;
939  if ((!support && goodclass) ||
940  (support && output->getBeta (pattern.x_id) < (goodclass ? get_C() : 0)))
941  {
942  ygp = current;
943  outp = output;
944  break;
945  }
946  }
947  if (p == outputgradients.size ())
948  return pro_ret;
949 
950  /*
951  ** Find ym (2nd part of the pair)
952  */
953  outputgradient_t ygm;
954  LaRankOutput *outm = NULL;
955  int32_t m;
956  for (m = outputgradients.size () - 1; m >= 0; --m)
957  {
958  outputgradient_t & current = outputgradients[m];
959  LaRankOutput *output = getOutput (current.output);
960  bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
961  bool goodclass = current.output == pattern.y;
962  if (!goodclass || (support && output->getBeta (pattern.x_id) > 0))
963  {
964  ygm = current;
965  outm = output;
966  break;
967  }
968  }
969  if (m < 0)
970  return pro_ret;
971 
972  /*
973  ** Throw or Insert pattern
974  */
975  if ((ygp.gradient - ygm.gradient) < tau)
976  return pro_ret;
977  if (ptype == processNew)
978  patterns.insert (pattern);
979 
980  /*
981  ** compute lambda and clip it
982  */
983  float64_t kii = outp->getKii (pattern.x_id);
984  float64_t lambda = (ygp.gradient - ygm.gradient) / (2 * kii);
985  if (ptype == processOptimize || outp->isSupportVector (pattern.x_id))
986  {
987  float64_t beta = outp->getBeta (pattern.x_id);
988  if (ygp.output == pattern.y)
989  lambda = CMath::min (lambda, get_C() - beta);
990  else
991  lambda = CMath::min (lambda, fabs (beta));
992  }
993  else
994  lambda = CMath::min (lambda, get_C());
995 
996  /*
997  ** update the solution
998  */
999  outp->update (pattern.x_id, lambda, ygp.gradient);
1000  outm->update (pattern.x_id, -lambda, ygm.gradient);
1001 
1002  pro_ret.dual_increase = lambda * ((ygp.gradient - ygm.gradient) - lambda * kii);
1003  return pro_ret;
1004 }
1005 
1006 // ProcessOld
1007 float64_t CLaRank::reprocess ()
1008 {
1009  if (patterns.size ())
1010  {
1011  for (int32_t n = 0; n < 10; ++n)
1012  {
1013  process_return_t pro_ret = process (patterns.sample (), processOld);
1014  if (pro_ret.dual_increase)
1015  return pro_ret.dual_increase;
1016  }
1017  }
1018  return 0;
1019 }
1020 
1021 // Optimize
1022 float64_t CLaRank::optimize ()
1023 {
1024  float64_t dual_increase = 0;
1025  if (patterns.size ())
1026  {
1027  for (int32_t n = 0; n < 10; ++n)
1028  {
1029  process_return_t pro_ret =
1030  process (patterns.sample(), processOptimize);
1031  dual_increase += pro_ret.dual_increase;
1032  }
1033  }
1034  return dual_increase;
1035 }
1036 
1037 // remove patterns and return the number of patterns that were removed
1038 uint32_t CLaRank::cleanup ()
1039 {
1040  /*
1041  for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
1042  it->second.cleanup ();
1043 
1044  uint32_t res = 0;
1045  for (uint32_t i = 0; i < patterns.size (); ++i)
1046  {
1047  LaRankPattern & p = patterns[i];
1048  if (p.exists () && !outputs[p.y].isSupportVector (p.x_id))
1049  {
1050  patterns.remove (i);
1051  ++res;
1052  }
1053  }
1054  return res;
1055  */
1056  return 0;
1057 }

SHOGUN Machine Learning Toolbox - Documentation