SHOGUN: LaRank.cpp Source File

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 // Main functions of the LaRank algorithm for soving Multiclass SVM
00003 // Copyright (C) 2008- Antoine Bordes
00004 // Shogun specific adjustments (w) 2009 Soeren Sonnenburg
00005 
00006 // This library is free software; you can redistribute it and/or
00007 // modify it under the terms of the GNU Lesser General Public
00008 // License as published by the Free Software Foundation; either
00009 // version 2.1 of the License, or (at your option) any later version.
00010 // 
00011 // This program is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 // 
00016 // You should have received a copy of the GNU General Public License
00017 // aint64_t with this program; if not, write to the Free Software
00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
00019 //
00020 /***********************************************************************
00021  * 
00022  *  LUSH Lisp Universal Shell
00023  *    Copyright (C) 2002 Leon Bottou, Yann Le Cun, AT&T Corp, NECI.
00024  *  Includes parts of TL3:
00025  *    Copyright (C) 1987-1999 Leon Bottou and Neuristique.
00026  *  Includes selected parts of SN3.2:
00027  *    Copyright (C) 1991-2001 AT&T Corp.
00028  * 
00029  *  This program is free software; you can redistribute it and/or modify
00030  *  it under the terms of the GNU General Public License as published by
00031  *  the Free Software Foundation; either version 2 of the License, or
00032  *  (at your option) any later version.
00033  * 
00034  *  This program is distributed in the hope that it will be useful,
00035  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00036  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00037  *  GNU General Public License for more details.
00038  * 
00039  *  You should have received a copy of the GNU General Public License
00040  *  aint64_t with this program; if not, write to the Free Software
00041  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
00042  * 
00043  ***********************************************************************/
00044 
00045 /***********************************************************************
00046  * $Id: kcache.c,v 1.9 2007/01/25 22:42:09 leonb Exp $
00047  **********************************************************************/
00048 
00049 #include <vector>
00050 #include <algorithm>
00051 
00052 #include "lib/io.h"
00053 #include "lib/Signal.h"
00054 #include "lib/Time.h"
00055 #include "lib/Mathematics.h"
00056 #include "classifier/svm/LaRank.h"
00057 #include "kernel/Kernel.h"
00058 
00059 using namespace shogun;
00060 
00061 namespace shogun
00062 {
00063     static void * xmalloc (int32_t n)
00064     {
00065         void *p = malloc (n);
00066         if (!p)
00067             SG_SERROR ("Function malloc() has returned zero\n");
00068         return p;
00069     }
00070 
00071     static void * xrealloc (void *ptr, int32_t n)
00072     {
00073         if (!ptr)
00074             ptr = malloc (n);
00075         else
00076             ptr = realloc (ptr, n);
00077         if (!ptr)
00078             SG_SERROR ("Function realloc has returned zero\n");
00079         return ptr;
00080     }
00081 
00082     static larank_kcache_t* larank_kcache_create (CKernel* kernelfunc)
00083     {
00084         larank_kcache_t *self;
00085         self = (larank_kcache_t *) xmalloc (sizeof (larank_kcache_t));
00086         memset (self, 0, sizeof (larank_kcache_t));
00087         self->l = 0;
00088         self->maxrowlen = 0;
00089         self->func = kernelfunc;
00090         self->prevbuddy = self;
00091         self->nextbuddy = self;
00092         self->cursize = sizeof (larank_kcache_t);
00093         self->maxsize = 256 * 1024 * 1024;
00094         self->qprev = (int32_t *) xmalloc (sizeof (int32_t));
00095         self->qnext = (int32_t *) xmalloc (sizeof (int32_t));
00096         self->rnext = self->qnext + 1;
00097         self->rprev = self->qprev + 1;
00098         self->rprev[-1] = -1;
00099         self->rnext[-1] = -1;
00100         return self;
00101     }
00102 
00103     static void xtruncate (larank_kcache_t * self, int32_t k, int32_t nlen)
00104     {
00105         int32_t olen = self->rsize[k];
00106         if (nlen < olen)
00107         {
00108             float32_t *ndata;
00109             float32_t *odata = self->rdata[k];
00110             if (nlen > 0)
00111             {
00112                 ndata = (float32_t *) xmalloc (nlen * sizeof (float32_t));
00113                 memcpy (ndata, odata, nlen * sizeof (float32_t));
00114             }
00115             else
00116             {
00117                 ndata = 0;
00118                 self->rnext[self->rprev[k]] = self->rnext[k];
00119                 self->rprev[self->rnext[k]] = self->rprev[k];
00120                 self->rnext[k] = self->rprev[k] = k;
00121             }
00122             free (odata);
00123             self->rdata[k] = ndata;
00124             self->rsize[k] = nlen;
00125             self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
00126         }
00127     }
00128 
00129     static void xpurge (larank_kcache_t * self)
00130     {
00131         if (self->cursize > self->maxsize)
00132         {
00133             int32_t k = self->rprev[-1];
00134             while (self->cursize > self->maxsize && k != self->rnext[-1])
00135             {
00136                 int32_t pk = self->rprev[k];
00137                 xtruncate (self, k, 0);
00138                 k = pk;
00139             }
00140         }
00141     }
00142 
00143     static void larank_kcache_set_maximum_size (larank_kcache_t * self, int64_t entries)
00144     {
00145         ASSERT (self);
00146         ASSERT (entries > 0);
00147         self->maxsize = entries;
00148         xpurge (self);
00149     }
00150 
00151     static void larank_kcache_destroy (larank_kcache_t * self)
00152     {
00153         if (self)
00154         {
00155             int32_t i;
00156             larank_kcache_t *nb = self->nextbuddy;
00157             larank_kcache_t *pb = self->prevbuddy;
00158             pb->nextbuddy = nb;
00159             nb->prevbuddy = pb;
00160             /* delete */
00161             if (self->i2r)
00162                 free (self->i2r);
00163             if (self->r2i)
00164                 free (self->r2i);
00165             if (self->rdata)
00166                 for (i = 0; i < self->l; i++)
00167                     if (self->rdata[i])
00168                         free (self->rdata[i]);
00169             if (self->rdata)
00170                 free (self->rdata);
00171             if (self->rsize)
00172                 free (self->rsize);
00173             if (self->rdiag)
00174                 free (self->rdiag);
00175             if (self->qnext)
00176                 free (self->qnext);
00177             if (self->qprev)
00178                 free (self->qprev);
00179             memset (self, 0, sizeof (larank_kcache_t));
00180             free (self);
00181         }
00182     }
00183 
00184     static void xminsize (larank_kcache_t * self, int32_t n)
00185     {
00186         int32_t ol = self->l;
00187         if (n > ol)
00188         {
00189             int32_t i;
00190             int32_t nl = CMath::max (256, ol);
00191             while (nl < n)
00192                 nl = nl + nl;
00193             self->i2r = (int32_t *) xrealloc (self->i2r, nl * sizeof (int32_t));
00194             self->r2i = (int32_t *) xrealloc (self->r2i, nl * sizeof (int32_t));
00195             self->rsize = (int32_t *) xrealloc (self->rsize, nl * sizeof (int32_t));
00196             self->qnext = (int32_t *) xrealloc (self->qnext, (1 + nl) * sizeof (int32_t));
00197             self->qprev = (int32_t *) xrealloc (self->qprev, (1 + nl) * sizeof (int32_t));
00198             self->rdiag = (float32_t *) xrealloc (self->rdiag, nl * sizeof (float32_t));
00199             self->rdata = (float32_t **) xrealloc (self->rdata, nl * sizeof (float32_t *));
00200             self->rnext = self->qnext + 1;
00201             self->rprev = self->qprev + 1;
00202             for (i = ol; i < nl; i++)
00203             {
00204                 self->i2r[i] = i;
00205                 self->r2i[i] = i;
00206                 self->rsize[i] = -1;
00207                 self->rnext[i] = i;
00208                 self->rprev[i] = i;
00209                 self->rdata[i] = 0;
00210             }
00211             self->l = nl;
00212         }
00213     }
00214 
00215     static int32_t* larank_kcache_r2i (larank_kcache_t * self, int32_t n)
00216     {
00217         xminsize (self, n);
00218         return self->r2i;
00219     }
00220 
00221     static void xextend (larank_kcache_t * self, int32_t k, int32_t nlen)
00222     {
00223         int32_t olen = self->rsize[k];
00224         if (nlen > olen)
00225         {
00226             float32_t *ndata = (float32_t *) xmalloc (nlen * sizeof (float32_t));
00227             if (olen > 0)
00228             {
00229                 float32_t *odata = self->rdata[k];
00230                 memcpy (ndata, odata, olen * sizeof (float32_t));
00231                 free (odata);
00232             }
00233             self->rdata[k] = ndata;
00234             self->rsize[k] = nlen;
00235             self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
00236             self->maxrowlen = CMath::max (self->maxrowlen, nlen);
00237         }
00238     }
00239 
00240     static void xswap (larank_kcache_t * self, int32_t i1, int32_t i2, int32_t r1, int32_t r2)
00241     {
00242         /* swap row data */
00243         if (r1 < self->maxrowlen || r2 < self->maxrowlen)
00244         {
00245             int32_t mrl = 0;
00246             int32_t k = self->rnext[-1];
00247             while (k >= 0)
00248             {
00249                 int32_t nk = self->rnext[k];
00250                 int32_t n = self->rsize[k];
00251                 int32_t rr = self->i2r[k];
00252                 float32_t *d = self->rdata[k];
00253                 if (r1 < n)
00254                 {
00255                     if (r2 < n)
00256                     {
00257                         float32_t t1 = d[r1];
00258                         float32_t t2 = d[r2];
00259                         d[r1] = t2;
00260                         d[r2] = t1;
00261                     }
00262                     else if (rr == r2)
00263                     {
00264                         d[r1] = self->rdiag[k];
00265                     }
00266                     else
00267                     {
00268                         int32_t arsize = self->rsize[i2];
00269                         if (rr < arsize && rr != r1)
00270                             d[r1] = self->rdata[i2][rr];
00271                         else
00272                             xtruncate (self, k, r1);
00273                     }
00274                 }
00275                 else if (r2 < n)
00276                 {
00277                     if (rr == r1)
00278                     {
00279                         d[r2] = self->rdiag[k];
00280                     }
00281                     else
00282                     {
00283                         int32_t arsize = self->rsize[i1];
00284                         if (rr < arsize && rr != r2)
00285                             d[r2] = self->rdata[i1][rr];
00286                         else
00287                             xtruncate (self, k, r2);
00288                     }
00289                 }
00290                 mrl = CMath::max (mrl, self->rsize[k]);
00291                 k = nk;
00292             }
00293             self->maxrowlen = mrl;
00294         }
00295         /* swap r2i and i2r */
00296         self->r2i[r1] = i2;
00297         self->r2i[r2] = i1;
00298         self->i2r[i1] = r2;
00299         self->i2r[i2] = r1;
00300     }
00301 
00302     static void larank_kcache_swap_rr (larank_kcache_t * self, int32_t r1, int32_t r2)
00303     {
00304         xminsize (self, 1 + CMath::max(r1, r2));
00305         xswap (self, self->r2i[r1], self->r2i[r2], r1, r2);
00306     }
00307 
00308     static void larank_kcache_swap_ri (larank_kcache_t * self, int32_t r1, int32_t i2)
00309     {
00310         xminsize (self, 1 + CMath::max (r1, i2));
00311         xswap (self, self->r2i[r1], i2, r1, self->i2r[i2]);
00312     }
00313 
00314     static float64_t xquery (larank_kcache_t * self, int32_t i, int32_t j)
00315     {
00316         /* search buddies */
00317         larank_kcache_t *cache = self->nextbuddy;
00318         do
00319         {
00320             int32_t l = cache->l;
00321             if (i < l && j < l)
00322             {
00323                 int32_t s = cache->rsize[i];
00324                 int32_t p = cache->i2r[j];
00325                 if (p < s)
00326                     return cache->rdata[i][p];
00327                 if (i == j && s >= 0)
00328                     return cache->rdiag[i];
00329                 p = cache->i2r[i];
00330                 s = cache->rsize[j];
00331                 if (p < s)
00332                     return cache->rdata[j][p];
00333             }
00334             cache = cache->nextbuddy;
00335         }
00336         while (cache != self);
00337         /* compute */
00338         return self->func->kernel(i, j);
00339     }
00340 
00341 
00342     static float64_t larank_kcache_query (larank_kcache_t * self, int32_t i, int32_t j)
00343     {
00344         ASSERT (self);
00345         ASSERT (i >= 0);
00346         ASSERT (j >= 0);
00347         return xquery (self, i, j);
00348     }
00349 
00350 
00351     static void larank_kcache_set_buddy (larank_kcache_t * self, larank_kcache_t * buddy)
00352     {
00353         larank_kcache_t *p = self;
00354         larank_kcache_t *selflast = self->prevbuddy;
00355         larank_kcache_t *buddylast = buddy->prevbuddy;
00356         /* check functions are identical */
00357         ASSERT (self->func == buddy->func);
00358         /* make sure we are not already buddies */
00359         do
00360         {
00361             if (p == buddy)
00362                 return;
00363             p = p->nextbuddy;
00364         }
00365         while (p != self);
00366         /* link */
00367         selflast->nextbuddy = buddy;
00368         buddy->prevbuddy = selflast;
00369         buddylast->nextbuddy = self;
00370         self->prevbuddy = buddylast;
00371     }
00372 
00373 
00374     static float32_t* larank_kcache_query_row (larank_kcache_t * self, int32_t i, int32_t len)
00375     {
00376         ASSERT (i >= 0);
00377         if (i < self->l && len <= self->rsize[i])
00378         {
00379             self->rnext[self->rprev[i]] = self->rnext[i];
00380             self->rprev[self->rnext[i]] = self->rprev[i];
00381         }
00382         else
00383         {
00384             int32_t olen, p;
00385             float32_t *d;
00386             if (i >= self->l || len >= self->l)
00387                 xminsize (self, CMath::max (1 + i, len));
00388             olen = self->rsize[i];
00389             if (olen < len)
00390             {
00391                 if (olen < 0)
00392                 {
00393                     self->rdiag[i] = self->func->kernel(i, i);
00394                     olen = self->rsize[i] = 0;
00395                 }
00396                 xextend (self, i, len);
00397                 d = self->rdata[i];
00398                 self->rsize[i] = olen;
00399                 for (p = olen; p < len; p++)
00400                     d[p] = larank_kcache_query (self, self->r2i[p], i);
00401                 self->rsize[i] = len;
00402             }
00403             self->rnext[self->rprev[i]] = self->rnext[i];
00404             self->rprev[self->rnext[i]] = self->rprev[i];
00405             xpurge (self);
00406         }
00407         self->rprev[i] = -1;
00408         self->rnext[i] = self->rnext[-1];
00409         self->rnext[self->rprev[i]] = i;
00410         self->rprev[self->rnext[i]] = i;
00411         return self->rdata[i];
00412     }
00413 
00414 }
00415 
00416 
00417 // Initializing an output class (basically creating a kernel cache for it)
00418 void LaRankOutput::initialize (CKernel* kfunc, int64_t cache)
00419 {
00420     kernel = larank_kcache_create (kfunc);
00421     larank_kcache_set_maximum_size (kernel, cache * 1024 * 1024);
00422     beta = new float32_t[1];
00423     g = new float32_t[1];
00424     *beta=0;
00425     *g=0;
00426     l = 0;
00427 }
00428 
00429 // Destroying an output class (basically destroying the kernel cache)
00430 void LaRankOutput::destroy ()
00431 {
00432     larank_kcache_destroy (kernel);
00433     kernel=NULL;
00434     delete[] beta;
00435     delete[] g;
00436     beta=NULL;
00437     g=NULL;
00438 }
00439 
00440 // !Important! Computing the score of a given input vector for the actual output
00441 float64_t LaRankOutput::computeScore (int32_t x_id)
00442 {
00443     if (l == 0)
00444         return 0;
00445     else
00446     {
00447         float32_t *row = larank_kcache_query_row (kernel, x_id, l);
00448         return CMath::dot (beta, row, l);
00449     }
00450 }
00451 
00452 // !Important! Computing the gradient of a given input vector for the actual output           
00453 float64_t LaRankOutput::computeGradient (int32_t xi_id, int32_t yi, int32_t ythis)
00454 {
00455     return (yi == ythis ? 1 : 0) - computeScore (xi_id);
00456 }
00457 
00458 // Updating the solution in the actual output
00459 void LaRankOutput::update (int32_t x_id, float64_t lambda, float64_t gp)
00460 {
00461     int32_t *r2i = larank_kcache_r2i (kernel, l);
00462     int64_t xr = l + 1;
00463     for (int32_t r = 0; r < l; r++)
00464         if (r2i[r] == x_id)
00465         {
00466             xr = r;
00467             break;
00468         }
00469 
00470     // updates the cache order and the beta coefficient
00471     if (xr < l)
00472     {
00473         beta[xr]+=lambda;
00474     }
00475     else
00476     {
00477         larank_kcache_swap_ri (kernel, l, x_id);
00478         CMath::resize(g, l, l+1);
00479         CMath::resize(beta, l, l+1);
00480         g[l]=gp;
00481         beta[l]=lambda;
00482         l++;
00483     }
00484 
00485     // update stored gradients
00486     float32_t *row = larank_kcache_query_row (kernel, x_id, l);
00487     for (int32_t r = 0; r < l; r++)
00488     {
00489         float64_t oldg = g[r];
00490         g[r]=oldg - lambda * row[r];
00491     }
00492 }
00493 
00494 // Linking the cahe of this output to the cache of an other "buddy" output
00495 // so that if a requested value is not found in this cache, you can ask your buddy if it has it.                              
00496 void LaRankOutput::set_kernel_buddy (larank_kcache_t * bud)
00497 {
00498     larank_kcache_set_buddy (bud, kernel);
00499 }
00500 
00501 // Removing useless support vectors (for which beta=0)                
00502 int32_t LaRankOutput::cleanup ()
00503 {
00504     int32_t count = 0;
00505     std::vector < int32_t >idx;
00506     for (int32_t x = 0; x < l; x++)
00507     {
00508         if ((beta[x] < FLT_EPSILON) && (beta[x] > -FLT_EPSILON))
00509         {
00510             idx.push_back (x);
00511             count++;
00512         }
00513     }
00514     int32_t new_l = l - count;
00515     for (int32_t xx = 0; xx < count; xx++)
00516     {
00517         int32_t i = idx[xx] - xx;
00518         for (int32_t r = i; r < (l - 1); r++)
00519         {
00520             larank_kcache_swap_rr (kernel, r, int64_t(r) + 1);
00521             beta[r]=beta[r + 1];
00522             g[r]=g[r + 1];
00523         }
00524     }
00525     CMath::resize(beta, l, new_l+1);
00526     CMath::resize(g, l, new_l+1);
00527     beta[new_l]=0;
00528     g[new_l]=0;
00529     l = new_l;
00530     return count;
00531 }
00532 
00533 // --- Below are information or "get" functions --- //
00534 //
00535 float64_t LaRankOutput::getW2 ()
00536 {
00537     float64_t sum = 0;
00538     int32_t *r2i = larank_kcache_r2i (kernel, l + 1);
00539     for (int32_t r = 0; r < l; r++)
00540     {
00541         float32_t *row_r = larank_kcache_query_row (kernel, r2i[r], l);
00542         sum += beta[r] * CMath::dot (beta, row_r, l);
00543     }
00544     return sum;
00545 }
00546 
00547 float64_t LaRankOutput::getKii (int32_t x_id)
00548 {
00549     return larank_kcache_query (kernel, x_id, x_id);
00550 }
00551 
00552 //
00553 float64_t LaRankOutput::getBeta (int32_t x_id)
00554 {
00555     int32_t *r2i = larank_kcache_r2i (kernel, l);
00556     int32_t xr = -1;
00557     for (int32_t r = 0; r < l; r++)
00558         if (r2i[r] == x_id)
00559         {
00560             xr = r;
00561             break;
00562         }
00563     return (xr < 0 ? 0 : beta[xr]);
00564 }
00565 
00566 //
00567 float64_t LaRankOutput::getGradient (int32_t x_id)
00568 {
00569     int32_t *r2i = larank_kcache_r2i (kernel, l);
00570     int32_t xr = -1;
00571     for (int32_t r = 0; r < l; r++)
00572         if (r2i[r] == x_id)
00573         {
00574             xr = r;
00575             break;
00576         }
00577     return (xr < 0 ? 0 : g[xr]);
00578 }
00579 bool LaRankOutput::isSupportVector (int32_t x_id) const
00580 {
00581     int32_t *r2i = larank_kcache_r2i (kernel, l);
00582     int32_t xr = -1;
00583     for (int32_t r = 0; r < l; r++)
00584         if (r2i[r] == x_id)
00585         {
00586             xr = r;
00587             break;
00588         }
00589     return (xr >= 0);
00590 }
00591 
00592 //
00593 int32_t LaRankOutput::getSV (float32_t* &sv) const
00594 {
00595     sv=new float32_t[l];
00596     int32_t *r2i = larank_kcache_r2i (kernel, l);
00597     for (int32_t r = 0; r < l; r++)
00598         sv[r]=r2i[r];
00599     return l;
00600 }
00601 
00602 CLaRank::CLaRank (): CMultiClassSVM(ONE_VS_REST), 
00603     nb_seen_examples (0), nb_removed (0),
00604     n_pro (0), n_rep (0), n_opt (0),
00605     w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
00606     batch_mode(true), step(0)
00607 {
00608 }
00609 
00610 CLaRank::CLaRank (float64_t C, CKernel* k, CLabels* lab):
00611     CMultiClassSVM(ONE_VS_REST, C, k, lab), 
00612     nb_seen_examples (0), nb_removed (0),
00613     n_pro (0), n_rep (0), n_opt (0),
00614     w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
00615     batch_mode(true), step(0)
00616 {
00617 }
00618 
00619 CLaRank::~CLaRank ()
00620 {
00621     destroy();
00622 }
00623 
00624 bool CLaRank::train(CFeatures* data)
00625 {
00626     tau = 0.0001;
00627 
00628     ASSERT(kernel);
00629     ASSERT(labels && labels->get_num_labels());
00630 
00631     CSignal::clear_cancel();
00632 
00633     if (data)
00634     {
00635         if (data->get_num_vectors() != labels->get_num_labels())
00636         {
00637             SG_ERROR("Numbert of vectors (%d) does not match number of labels (%d)\n",
00638                     data->get_num_vectors(), labels->get_num_labels());
00639         }
00640         kernel->init(data, data);
00641     }
00642 
00643     ASSERT(kernel->get_num_vec_lhs() && kernel->get_num_vec_rhs());
00644 
00645     nb_train=labels->get_num_labels();
00646     cache = kernel->get_cache_size();
00647 
00648     int32_t n_it = 1;
00649     float64_t gap = DBL_MAX;
00650 
00651     SG_INFO("Training on %d examples\n", nb_train);
00652     while (gap > C1 && (!CSignal::cancel_computations()))      // stopping criteria
00653     {
00654         float64_t tr_err = 0;
00655         int32_t ind = step;
00656         for (int32_t i = 0; i < nb_train; i++)
00657         {
00658             int32_t y=labels->get_label(i);
00659             if (add (i, y) != y)   // call the add function
00660                 tr_err++;
00661 
00662             if (ind && i / ind)
00663             {
00664                 SG_DEBUG("Done: %d %% Train error (online): %f%%\n",
00665                         (int32_t) (((float64_t) i) / nb_train * 100), (tr_err / ((float64_t) i + 1)) * 100);
00666                 ind += step;
00667             }
00668         }
00669 
00670         SG_DEBUG("End of iteration %d\n", n_it++);
00671         SG_DEBUG("Train error (online): %f%%\n", (tr_err / nb_train) * 100);
00672         gap = computeGap ();
00673         SG_ABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(DBL_MAX), -CMath::log10(C1), 6);
00674 
00675         if (!batch_mode)        // skip stopping criteria if online mode
00676             gap = 0;
00677     }
00678     SG_DONE();
00679 
00680     int32_t num_classes = outputs.size();
00681     create_multiclass_svm(num_classes);
00682     SG_DEBUG("%d classes\n", num_classes);
00683 
00684     // Used for saving a model file
00685     int32_t i=0;
00686     for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
00687     {
00688         const LaRankOutput* o=&(it->second);
00689 
00690         larank_kcache_t* k=o->getKernel();
00691         int32_t l=o->get_l();
00692         float32_t* beta=o->getBetas();
00693         int32_t *r2i = larank_kcache_r2i (k, l);
00694 
00695         ASSERT(l>0);
00696         SG_DEBUG("svm[%d] has %d sv, b=%f\n", i, l, 0.0);
00697 
00698         CSVM* svm=new CSVM(l);
00699 
00700         for (int32_t j=0; j<l; j++)
00701         {
00702             svm->set_alpha(j, beta[j]);
00703             svm->set_support_vector(j, r2i[j]);
00704         }
00705 
00706         svm->set_bias(0);
00707         set_svm(i, svm);
00708         i++;
00709     }
00710     destroy();
00711 
00712     return true;
00713 }
00714 
00715 // LEARNING FUNCTION: add new patterns and run optimization steps selected with adaptative schedule
00716 int32_t CLaRank::add (int32_t x_id, int32_t yi)
00717 {
00718     ++nb_seen_examples;
00719     // create a new output object if this one has never been seen before 
00720     if (!getOutput (yi))
00721     {
00722         outputs.insert (std::make_pair (yi, LaRankOutput ()));
00723         LaRankOutput *cur = getOutput (yi);
00724         cur->initialize (kernel, cache);
00725         if (outputs.size () == 1)
00726             y0 = outputs.begin ()->first;
00727         // link the cache of this new output to a buddy 
00728         if (outputs.size () > 1)
00729         {
00730             LaRankOutput *out0 = getOutput (y0);
00731             cur->set_kernel_buddy (out0->getKernel ());
00732         }
00733     }
00734 
00735     LaRankPattern tpattern (x_id, yi);
00736     LaRankPattern & pattern = (patterns.isPattern (x_id)) ? patterns.getPattern (x_id) : tpattern;
00737 
00738     // ProcessNew with the "fresh" pattern
00739     float64_t time1 = CTime::get_curtime();
00740     process_return_t pro_ret = process (pattern, processNew);
00741     float64_t dual_increase = pro_ret.dual_increase;
00742     float64_t duration = (CTime::get_curtime() - time1);
00743     float64_t coeff = dual_increase / (0.00001 + duration);
00744     dual += dual_increase;
00745     n_pro++;
00746     w_pro = 0.05 * coeff + (1 - 0.05) * w_pro;
00747 
00748     // ProcessOld & Optimize until ready for a new processnew
00749     // (Adaptative schedule here)
00750     for (;;)
00751     {
00752         float64_t w_sum = w_pro + w_rep + w_opt;
00753         float64_t prop_min = w_sum / 20;
00754         if (w_pro < prop_min)
00755             w_pro = prop_min;
00756         if (w_rep < prop_min)
00757             w_rep = prop_min;
00758         if (w_opt < prop_min)
00759             w_opt = prop_min;
00760         w_sum = w_pro + w_rep + w_opt;
00761         float64_t r = CMath::random(0.0, w_sum);
00762         if (r <= w_pro)
00763         {
00764             break;
00765         }
00766         else if ((r > w_pro) && (r <= w_pro + w_rep))   // ProcessOld here
00767         {
00768             float64_t ltime1 = CTime::get_curtime ();
00769             float64_t ldual_increase = reprocess ();
00770             float64_t lduration = (CTime::get_curtime () - ltime1);
00771             float64_t lcoeff = ldual_increase / (0.00001 + lduration);
00772             dual += ldual_increase;
00773             n_rep++;
00774             w_rep = 0.05 * lcoeff + (1 - 0.05) * w_rep;
00775         }
00776         else            // Optimize here 
00777         {
00778             float64_t ltime1 = CTime::get_curtime ();
00779             float64_t ldual_increase = optimize ();
00780             float64_t lduration = (CTime::get_curtime () - ltime1);
00781             float64_t lcoeff = ldual_increase / (0.00001 + lduration);
00782             dual += ldual_increase;
00783             n_opt++;
00784             w_opt = 0.05 * lcoeff + (1 - 0.05) * w_opt;
00785         }
00786     }
00787     if (nb_seen_examples % 100 == 0)    // Cleanup useless Support Vectors/Patterns sometimes
00788         nb_removed += cleanup ();
00789     return pro_ret.ypred;
00790 }
00791 
00792 // PREDICTION FUNCTION: main function in la_rank_classify
00793 int32_t CLaRank::predict (int32_t x_id)
00794 {
00795     int32_t res = -1;
00796     float64_t score_max = -DBL_MAX;
00797     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
00798     {
00799         float64_t score = it->second.computeScore (x_id);
00800         if (score > score_max)
00801         {
00802             score_max = score;
00803             res = it->first;
00804         }
00805     }
00806     return res;
00807 }
00808 
00809 void CLaRank::destroy ()
00810 {
00811     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
00812         it->second.destroy ();
00813 }
00814 
00815 
00816 // Compute Duality gap (costly but used in stopping criteria in batch mode)                     
00817 float64_t CLaRank::computeGap ()
00818 {
00819     float64_t sum_sl = 0;
00820     float64_t sum_bi = 0;
00821     for (uint32_t i = 0; i < patterns.maxcount (); ++i)
00822     {
00823         const LaRankPattern & p = patterns[i];
00824         if (!p.exists ())
00825             continue;
00826         LaRankOutput *out = getOutput (p.y);
00827         if (!out)
00828             continue;
00829         sum_bi += out->getBeta (p.x_id);
00830         float64_t gi = out->computeGradient (p.x_id, p.y, p.y);
00831         float64_t gmin = DBL_MAX;
00832         for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
00833         {
00834             if (it->first != p.y && it->second.isSupportVector (p.x_id))
00835             {
00836                 float64_t g =
00837                     it->second.computeGradient (p.x_id, p.y, it->first);
00838                 if (g < gmin)
00839                     gmin = g;
00840             }
00841         }
00842         sum_sl += CMath::max (0.0, gi - gmin);
00843     }
00844     return CMath::max (0.0, computeW2 () + C1 * sum_sl - sum_bi);
00845 }
00846 
00847 // Nuber of classes so far
00848 uint32_t CLaRank::getNumOutputs () const
00849 {
00850     return outputs.size ();
00851 }
00852 
00853 // Number of Support Vectors
00854 int32_t CLaRank::getNSV ()
00855 {
00856     int32_t res = 0;
00857     for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
00858     {
00859         float32_t* sv=NULL;
00860         res += it->second.getSV (sv);
00861         delete[] sv;
00862     }
00863     return res;
00864 }
00865 
00866 // Norm of the parameters vector
00867 float64_t CLaRank::computeW2 ()
00868 {
00869     float64_t res = 0;
00870     for (uint32_t i = 0; i < patterns.maxcount (); ++i)
00871     {
00872         const LaRankPattern & p = patterns[i];
00873         if (!p.exists ())
00874             continue;
00875         for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
00876             if (it->second.getBeta (p.x_id))
00877                 res += it->second.getBeta (p.x_id) * it->second.computeScore (p.x_id);
00878     }
00879     return res;
00880 }
00881 
00882 // Compute Dual objective value
00883 float64_t CLaRank::getDual ()
00884 {
00885     float64_t res = 0;
00886     for (uint32_t i = 0; i < patterns.maxcount (); ++i)
00887     {
00888         const LaRankPattern & p = patterns[i];
00889         if (!p.exists ())
00890             continue;
00891         LaRankOutput *out = getOutput (p.y);
00892         if (!out)
00893             continue;
00894         res += out->getBeta (p.x_id);
00895     }
00896     return res - computeW2 () / 2;
00897 }
00898 
00899 LaRankOutput *CLaRank::getOutput (int32_t index)
00900 {
00901     outputhash_t::iterator it = outputs.find (index);
00902     return it == outputs.end ()? NULL : &it->second;
00903 }
00904 
00905 // IMPORTANT Main SMO optimization step
00906 CLaRank::process_return_t CLaRank::process (const LaRankPattern & pattern, process_type ptype)
00907 {
00908     process_return_t pro_ret = process_return_t (0, 0);
00909 
00910     /*
00911      ** compute gradient and sort   
00912      */
00913     std::vector < outputgradient_t > outputgradients;
00914 
00915     outputgradients.reserve (getNumOutputs ());
00916 
00917     std::vector < outputgradient_t > outputscores;
00918     outputscores.reserve (getNumOutputs ());
00919 
00920     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
00921     {
00922         if (ptype != processOptimize
00923                 || it->second.isSupportVector (pattern.x_id))
00924         {
00925             float64_t g =
00926                 it->second.computeGradient (pattern.x_id, pattern.y, it->first);
00927             outputgradients.push_back (outputgradient_t (it->first, g));
00928             if (it->first == pattern.y)
00929                 outputscores.push_back (outputgradient_t (it->first, (1 - g)));
00930             else
00931                 outputscores.push_back (outputgradient_t (it->first, -g));
00932         }
00933     }
00934 
00935     std::sort (outputgradients.begin (), outputgradients.end ());
00936 
00937     /*
00938      ** determine the prediction
00939      */
00940     std::sort (outputscores.begin (), outputscores.end ());
00941     pro_ret.ypred = outputscores[0].output;
00942 
00943     /*
00944      ** Find yp (1st part of the pair)
00945      */
00946     outputgradient_t ygp;
00947     LaRankOutput *outp = NULL;
00948     uint32_t p;
00949     for (p = 0; p < outputgradients.size (); ++p)
00950     {
00951         outputgradient_t & current = outputgradients[p];
00952         LaRankOutput *output = getOutput (current.output);
00953         bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
00954         bool goodclass = current.output == pattern.y;
00955         if ((!support && goodclass) ||
00956                 (support && output->getBeta (pattern.x_id) < (goodclass ? C1 : 0)))
00957         {
00958             ygp = current;
00959             outp = output;
00960             break;
00961         }
00962     }
00963     if (p == outputgradients.size ())
00964         return pro_ret;
00965 
00966     /*
00967      ** Find ym (2nd part of the pair)
00968      */
00969     outputgradient_t ygm;
00970     LaRankOutput *outm = NULL;
00971     int32_t m;
00972     for (m = outputgradients.size () - 1; m >= 0; --m)
00973     {
00974         outputgradient_t & current = outputgradients[m];
00975         LaRankOutput *output = getOutput (current.output);
00976         bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
00977         bool goodclass = current.output == pattern.y;
00978         if (!goodclass || (support && output->getBeta (pattern.x_id) > 0))
00979         {
00980             ygm = current;
00981             outm = output;
00982             break;
00983         }
00984     }
00985     if (m < 0)
00986         return pro_ret;
00987 
00988     /*
00989      ** Throw or Insert pattern
00990      */
00991     if ((ygp.gradient - ygm.gradient) < tau)
00992         return pro_ret;
00993     if (ptype == processNew)
00994         patterns.insert (pattern);
00995 
00996     /*
00997      ** compute lambda and clip it
00998      */
00999     float64_t kii = outp->getKii (pattern.x_id);
01000     float64_t lambda = (ygp.gradient - ygm.gradient) / (2 * kii);
01001     if (ptype == processOptimize || outp->isSupportVector (pattern.x_id))
01002     {
01003         float64_t beta = outp->getBeta (pattern.x_id);
01004         if (ygp.output == pattern.y)
01005             lambda = CMath::min (lambda, C1 - beta);
01006         else
01007             lambda = CMath::min (lambda, fabs (beta));
01008     }
01009     else
01010         lambda = CMath::min (lambda, C1);
01011 
01012     /*
01013      ** update the solution
01014      */
01015     outp->update (pattern.x_id, lambda, ygp.gradient);
01016     outm->update (pattern.x_id, -lambda, ygm.gradient);
01017 
01018     pro_ret.dual_increase = lambda * ((ygp.gradient - ygm.gradient) - lambda * kii);
01019     return pro_ret;
01020 }
01021 
01022 // ProcessOld
01023 float64_t CLaRank::reprocess ()
01024 {
01025     if (patterns.size ())
01026     {
01027         for (int32_t n = 0; n < 10; ++n)
01028         {
01029             process_return_t pro_ret = process (patterns.sample (), processOld);
01030             if (pro_ret.dual_increase)
01031                 return pro_ret.dual_increase;
01032         }
01033     }
01034     return 0;
01035 }
01036 
01037 // Optimize
01038 float64_t CLaRank::optimize ()
01039 {
01040     float64_t dual_increase = 0;
01041     if (patterns.size ())
01042     {
01043         for (int32_t n = 0; n < 10; ++n)
01044         {
01045             process_return_t pro_ret =
01046                 process (patterns.sample(), processOptimize);
01047             dual_increase += pro_ret.dual_increase;
01048         }
01049     }
01050     return dual_increase;
01051 }
01052 
01053 // remove patterns and return the number of patterns that were removed
01054 uint32_t CLaRank::cleanup ()
01055 {
01056     /*
01057     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
01058         it->second.cleanup ();
01059 
01060     uint32_t res = 0;
01061     for (uint32_t i = 0; i < patterns.size (); ++i)
01062     {
01063         LaRankPattern & p = patterns[i];
01064         if (p.exists () && !outputs[p.y].isSupportVector (p.x_id))
01065         {
01066             patterns.remove (i);
01067             ++res;
01068         }
01069     }
01070     return res;
01071     */
01072     return 0;
01073 }