LaRank.cpp

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 // Main functions of the LaRank algorithm for soving Multiclass SVM
00003 // Copyright (C) 2008- Antoine Bordes
00004 // Shogun specific adjustments (w) 2009 Soeren Sonnenburg
00005 
00006 // This library is free software; you can redistribute it and/or
00007 // modify it under the terms of the GNU Lesser General Public
00008 // License as published by the Free Software Foundation; either
00009 // version 2.1 of the License, or (at your option) any later version.
00010 //
00011 // This program is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 //
00016 // You should have received a copy of the GNU General Public License
00017 // aint64_t with this program; if not, write to the Free Software
00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
00019 //
00020 /***********************************************************************
00021  *
00022  *  LUSH Lisp Universal Shell
00023  *    Copyright (C) 2002 Leon Bottou, Yann Le Cun, AT&T Corp, NECI.
00024  *  Includes parts of TL3:
00025  *    Copyright (C) 1987-1999 Leon Bottou and Neuristique.
00026  *  Includes selected parts of SN3.2:
00027  *    Copyright (C) 1991-2001 AT&T Corp.
00028  *
00029  *  This program is free software; you can redistribute it and/or modify
00030  *  it under the terms of the GNU General Public License as published by
00031  *  the Free Software Foundation; either version 2 of the License, or
00032  *  (at your option) any later version.
00033  *
00034  *  This program is distributed in the hope that it will be useful,
00035  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00036  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00037  *  GNU General Public License for more details.
00038  *
00039  *  You should have received a copy of the GNU General Public License
00040  *  aint64_t with this program; if not, write to the Free Software
00041  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
00042  *
00043  ***********************************************************************/
00044 
00045 /***********************************************************************
00046  * $Id: kcache.c,v 1.9 2007/01/25 22:42:09 leonb Exp $
00047  **********************************************************************/
00048 
00049 #include <vector>
00050 #include <algorithm>
00051 
00052 #include <shogun/io/SGIO.h>
00053 #include <shogun/lib/Signal.h>
00054 #include <shogun/lib/Time.h>
00055 #include <shogun/mathematics/Math.h>
00056 #include <shogun/multiclass/LaRank.h>
00057 #include <shogun/multiclass/MulticlassOneVsRestStrategy.h>
00058 #include <shogun/kernel/Kernel.h>
00059 #include <shogun/labels/MulticlassLabels.h>
00060 
00061 using namespace shogun;
00062 
00063 namespace shogun
00064 {
00065     static larank_kcache_t* larank_kcache_create (CKernel* kernelfunc)
00066     {
00067         larank_kcache_t *self;
00068         self = SG_CALLOC (larank_kcache_t, 1);
00069         self->l = 0;
00070         self->maxrowlen = 0;
00071         self->func = kernelfunc;
00072         self->prevbuddy = self;
00073         self->nextbuddy = self;
00074         self->cursize = sizeof (larank_kcache_t);
00075         self->maxsize = 256 * 1024 * 1024;
00076         self->qprev = SG_MALLOC(int32_t, 1);
00077         self->qnext = SG_MALLOC(int32_t, 1);
00078         self->rnext = self->qnext + 1;
00079         self->rprev = self->qprev + 1;
00080         self->rprev[-1] = -1;
00081         self->rnext[-1] = -1;
00082         return self;
00083     }
00084 
00085     static void xtruncate (larank_kcache_t * self, int32_t k, int32_t nlen)
00086     {
00087         int32_t olen = self->rsize[k];
00088         if (nlen < olen)
00089         {
00090             float32_t *ndata;
00091             float32_t *odata = self->rdata[k];
00092             if (nlen > 0)
00093             {
00094                 ndata = SG_MALLOC(float32_t, nlen);
00095                 memcpy (ndata, odata, nlen * sizeof (float32_t));
00096             }
00097             else
00098             {
00099                 ndata = 0;
00100                 self->rnext[self->rprev[k]] = self->rnext[k];
00101                 self->rprev[self->rnext[k]] = self->rprev[k];
00102                 self->rnext[k] = self->rprev[k] = k;
00103             }
00104             SG_FREE (odata);
00105             self->rdata[k] = ndata;
00106             self->rsize[k] = nlen;
00107             self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
00108         }
00109     }
00110 
00111     static void xpurge (larank_kcache_t * self)
00112     {
00113         if (self->cursize > self->maxsize)
00114         {
00115             int32_t k = self->rprev[-1];
00116             while (self->cursize > self->maxsize && k != self->rnext[-1])
00117             {
00118                 int32_t pk = self->rprev[k];
00119                 xtruncate (self, k, 0);
00120                 k = pk;
00121             }
00122         }
00123     }
00124 
00125     static void larank_kcache_set_maximum_size (larank_kcache_t * self, int64_t entries)
00126     {
00127         ASSERT (self);
00128         ASSERT (entries > 0);
00129         self->maxsize = entries;
00130         xpurge (self);
00131     }
00132 
00133     static void larank_kcache_destroy (larank_kcache_t * self)
00134     {
00135         if (self)
00136         {
00137             int32_t i;
00138             larank_kcache_t *nb = self->nextbuddy;
00139             larank_kcache_t *pb = self->prevbuddy;
00140             pb->nextbuddy = nb;
00141             nb->prevbuddy = pb;
00142             /* delete */
00143             if (self->i2r)
00144                 SG_FREE (self->i2r);
00145             if (self->r2i)
00146                 SG_FREE (self->r2i);
00147             if (self->rdata)
00148                 for (i = 0; i < self->l; i++)
00149                     if (self->rdata[i])
00150                         SG_FREE (self->rdata[i]);
00151             if (self->rdata)
00152                 SG_FREE (self->rdata);
00153             if (self->rsize)
00154                 SG_FREE (self->rsize);
00155             if (self->rdiag)
00156                 SG_FREE (self->rdiag);
00157             if (self->qnext)
00158                 SG_FREE (self->qnext);
00159             if (self->qprev)
00160                 SG_FREE (self->qprev);
00161             memset (self, 0, sizeof (larank_kcache_t));
00162             SG_FREE (self);
00163         }
00164     }
00165 
00166     static void xminsize (larank_kcache_t * self, int32_t n)
00167     {
00168         int32_t ol = self->l;
00169         if (n > ol)
00170         {
00171             int32_t i;
00172             int32_t nl = CMath::max (256, ol);
00173             while (nl < n)
00174                 nl = nl + nl;
00175             self->i2r = SG_REALLOC (int32_t, self->i2r, nl);
00176             self->r2i = SG_REALLOC (int32_t, self->r2i, nl);
00177             self->rsize = SG_REALLOC (int32_t, self->rsize, nl);
00178             self->qnext = SG_REALLOC (int32_t, self->qnext, (1 + nl));
00179             self->qprev = SG_REALLOC (int32_t, self->qprev, (1 + nl));
00180             self->rdiag = SG_REALLOC (float32_t, self->rdiag, nl);
00181             self->rdata = SG_REALLOC (float32_t*, self->rdata, nl);
00182             self->rnext = self->qnext + 1;
00183             self->rprev = self->qprev + 1;
00184             for (i = ol; i < nl; i++)
00185             {
00186                 self->i2r[i] = i;
00187                 self->r2i[i] = i;
00188                 self->rsize[i] = -1;
00189                 self->rnext[i] = i;
00190                 self->rprev[i] = i;
00191                 self->rdata[i] = 0;
00192             }
00193             self->l = nl;
00194         }
00195     }
00196 
00197     static int32_t* larank_kcache_r2i (larank_kcache_t * self, int32_t n)
00198     {
00199         xminsize (self, n);
00200         return self->r2i;
00201     }
00202 
00203     static void xextend (larank_kcache_t * self, int32_t k, int32_t nlen)
00204     {
00205         int32_t olen = self->rsize[k];
00206         if (nlen > olen)
00207         {
00208             float32_t *ndata = SG_MALLOC(float32_t, nlen);
00209             if (olen > 0)
00210             {
00211                 float32_t *odata = self->rdata[k];
00212                 memcpy (ndata, odata, olen * sizeof (float32_t));
00213                 SG_FREE (odata);
00214             }
00215             self->rdata[k] = ndata;
00216             self->rsize[k] = nlen;
00217             self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
00218             self->maxrowlen = CMath::max (self->maxrowlen, nlen);
00219         }
00220     }
00221 
00222     static void xswap (larank_kcache_t * self, int32_t i1, int32_t i2, int32_t r1, int32_t r2)
00223     {
00224         /* swap row data */
00225         if (r1 < self->maxrowlen || r2 < self->maxrowlen)
00226         {
00227             int32_t mrl = 0;
00228             int32_t k = self->rnext[-1];
00229             while (k >= 0)
00230             {
00231                 int32_t nk = self->rnext[k];
00232                 int32_t n = self->rsize[k];
00233                 int32_t rr = self->i2r[k];
00234                 float32_t *d = self->rdata[k];
00235                 if (r1 < n)
00236                 {
00237                     if (r2 < n)
00238                     {
00239                         float32_t t1 = d[r1];
00240                         float32_t t2 = d[r2];
00241                         d[r1] = t2;
00242                         d[r2] = t1;
00243                     }
00244                     else if (rr == r2)
00245                     {
00246                         d[r1] = self->rdiag[k];
00247                     }
00248                     else
00249                     {
00250                         int32_t arsize = self->rsize[i2];
00251                         if (rr < arsize && rr != r1)
00252                             d[r1] = self->rdata[i2][rr];
00253                         else
00254                             xtruncate (self, k, r1);
00255                     }
00256                 }
00257                 else if (r2 < n)
00258                 {
00259                     if (rr == r1)
00260                     {
00261                         d[r2] = self->rdiag[k];
00262                     }
00263                     else
00264                     {
00265                         int32_t arsize = self->rsize[i1];
00266                         if (rr < arsize && rr != r2)
00267                             d[r2] = self->rdata[i1][rr];
00268                         else
00269                             xtruncate (self, k, r2);
00270                     }
00271                 }
00272                 mrl = CMath::max (mrl, self->rsize[k]);
00273                 k = nk;
00274             }
00275             self->maxrowlen = mrl;
00276         }
00277         /* swap r2i and i2r */
00278         self->r2i[r1] = i2;
00279         self->r2i[r2] = i1;
00280         self->i2r[i1] = r2;
00281         self->i2r[i2] = r1;
00282     }
00283 
00284     static void larank_kcache_swap_rr (larank_kcache_t * self, int32_t r1, int32_t r2)
00285     {
00286         xminsize (self, 1 + CMath::max(r1, r2));
00287         xswap (self, self->r2i[r1], self->r2i[r2], r1, r2);
00288     }
00289 
00290     static void larank_kcache_swap_ri (larank_kcache_t * self, int32_t r1, int32_t i2)
00291     {
00292         xminsize (self, 1 + CMath::max (r1, i2));
00293         xswap (self, self->r2i[r1], i2, r1, self->i2r[i2]);
00294     }
00295 
00296     static float64_t xquery (larank_kcache_t * self, int32_t i, int32_t j)
00297     {
00298         /* search buddies */
00299         larank_kcache_t *cache = self->nextbuddy;
00300         do
00301         {
00302             int32_t l = cache->l;
00303             if (i < l && j < l)
00304             {
00305                 int32_t s = cache->rsize[i];
00306                 int32_t p = cache->i2r[j];
00307                 if (p < s)
00308                     return cache->rdata[i][p];
00309                 if (i == j && s >= 0)
00310                     return cache->rdiag[i];
00311                 p = cache->i2r[i];
00312                 s = cache->rsize[j];
00313                 if (p < s)
00314                     return cache->rdata[j][p];
00315             }
00316             cache = cache->nextbuddy;
00317         }
00318         while (cache != self);
00319         /* compute */
00320         return self->func->kernel(i, j);
00321     }
00322 
00323 
00324     static float64_t larank_kcache_query (larank_kcache_t * self, int32_t i, int32_t j)
00325     {
00326         ASSERT (self);
00327         ASSERT (i >= 0);
00328         ASSERT (j >= 0);
00329         return xquery (self, i, j);
00330     }
00331 
00332 
00333     static void larank_kcache_set_buddy (larank_kcache_t * self, larank_kcache_t * buddy)
00334     {
00335         larank_kcache_t *p = self;
00336         larank_kcache_t *selflast = self->prevbuddy;
00337         larank_kcache_t *buddylast = buddy->prevbuddy;
00338         /* check functions are identical */
00339         ASSERT (self->func == buddy->func);
00340         /* make sure we are not already buddies */
00341         do
00342         {
00343             if (p == buddy)
00344                 return;
00345             p = p->nextbuddy;
00346         }
00347         while (p != self);
00348         /* link */
00349         selflast->nextbuddy = buddy;
00350         buddy->prevbuddy = selflast;
00351         buddylast->nextbuddy = self;
00352         self->prevbuddy = buddylast;
00353     }
00354 
00355 
00356     static float32_t* larank_kcache_query_row (larank_kcache_t * self, int32_t i, int32_t len)
00357     {
00358         ASSERT (i >= 0);
00359         if (i < self->l && len <= self->rsize[i])
00360         {
00361             self->rnext[self->rprev[i]] = self->rnext[i];
00362             self->rprev[self->rnext[i]] = self->rprev[i];
00363         }
00364         else
00365         {
00366             int32_t olen, p;
00367             float32_t *d;
00368             if (i >= self->l || len >= self->l)
00369                 xminsize (self, CMath::max (1 + i, len));
00370             olen = self->rsize[i];
00371             if (olen < len)
00372             {
00373                 if (olen < 0)
00374                 {
00375                     self->rdiag[i] = self->func->kernel(i, i);
00376                     olen = self->rsize[i] = 0;
00377                 }
00378                 xextend (self, i, len);
00379                 d = self->rdata[i];
00380                 self->rsize[i] = olen;
00381                 for (p = olen; p < len; p++)
00382                     d[p] = larank_kcache_query (self, self->r2i[p], i);
00383                 self->rsize[i] = len;
00384             }
00385             self->rnext[self->rprev[i]] = self->rnext[i];
00386             self->rprev[self->rnext[i]] = self->rprev[i];
00387             xpurge (self);
00388         }
00389         self->rprev[i] = -1;
00390         self->rnext[i] = self->rnext[-1];
00391         self->rnext[self->rprev[i]] = i;
00392         self->rprev[self->rnext[i]] = i;
00393         return self->rdata[i];
00394     }
00395 
00396 }
00397 
00398 
00399 // Initializing an output class (basically creating a kernel cache for it)
00400 void LaRankOutput::initialize (CKernel* kfunc, int64_t cache)
00401 {
00402     kernel = larank_kcache_create (kfunc);
00403     larank_kcache_set_maximum_size (kernel, cache * 1024 * 1024);
00404     beta = SG_MALLOC(float32_t, 1);
00405     g = SG_MALLOC(float32_t, 1);
00406     *beta=0;
00407     *g=0;
00408     l = 0;
00409 }
00410 
00411 // Destroying an output class (basically destroying the kernel cache)
00412 void LaRankOutput::destroy ()
00413 {
00414     larank_kcache_destroy (kernel);
00415     kernel=NULL;
00416     SG_FREE(beta);
00417     SG_FREE(g);
00418     beta=NULL;
00419     g=NULL;
00420 }
00421 
00422 // !Important! Computing the score of a given input vector for the actual output
00423 float64_t LaRankOutput::computeScore (int32_t x_id)
00424 {
00425     if (l == 0)
00426         return 0;
00427     else
00428     {
00429         float32_t *row = larank_kcache_query_row (kernel, x_id, l);
00430         return SGVector<float32_t>::dot (beta, row, l);
00431     }
00432 }
00433 
00434 // !Important! Computing the gradient of a given input vector for the actual output
00435 float64_t LaRankOutput::computeGradient (int32_t xi_id, int32_t yi, int32_t ythis)
00436 {
00437     return (yi == ythis ? 1 : 0) - computeScore (xi_id);
00438 }
00439 
00440 // Updating the solution in the actual output
00441 void LaRankOutput::update (int32_t x_id, float64_t lambda, float64_t gp)
00442 {
00443     int32_t *r2i = larank_kcache_r2i (kernel, l);
00444     int64_t xr = l + 1;
00445     for (int32_t r = 0; r < l; r++)
00446         if (r2i[r] == x_id)
00447         {
00448             xr = r;
00449             break;
00450         }
00451 
00452     // updates the cache order and the beta coefficient
00453     if (xr < l)
00454     {
00455         beta[xr]+=lambda;
00456     }
00457     else
00458     {
00459         larank_kcache_swap_ri (kernel, l, x_id);
00460         SGVector<float32_t>::resize(g, l, l+1);
00461         SGVector<float32_t>::resize(beta, l, l+1);
00462         g[l]=gp;
00463         beta[l]=lambda;
00464         l++;
00465     }
00466 
00467     // update stored gradients
00468     float32_t *row = larank_kcache_query_row (kernel, x_id, l);
00469     for (int32_t r = 0; r < l; r++)
00470     {
00471         float64_t oldg = g[r];
00472         g[r]=oldg - lambda * row[r];
00473     }
00474 }
00475 
00476 // Linking the cahe of this output to the cache of an other "buddy" output
00477 // so that if a requested value is not found in this cache, you can ask your buddy if it has it.
00478 void LaRankOutput::set_kernel_buddy (larank_kcache_t * bud)
00479 {
00480     larank_kcache_set_buddy (bud, kernel);
00481 }
00482 
00483 // Removing useless support vectors (for which beta=0)
00484 int32_t LaRankOutput::cleanup ()
00485 {
00486     int32_t count = 0;
00487     std::vector < int32_t >idx;
00488     for (int32_t x = 0; x < l; x++)
00489     {
00490         if ((beta[x] < FLT_EPSILON) && (beta[x] > -FLT_EPSILON))
00491         {
00492             idx.push_back (x);
00493             count++;
00494         }
00495     }
00496     int32_t new_l = l - count;
00497     for (int32_t xx = 0; xx < count; xx++)
00498     {
00499         int32_t i = idx[xx] - xx;
00500         for (int32_t r = i; r < (l - 1); r++)
00501         {
00502             larank_kcache_swap_rr (kernel, r, int64_t(r) + 1);
00503             beta[r]=beta[r + 1];
00504             g[r]=g[r + 1];
00505         }
00506     }
00507     SGVector<float32_t>::resize(beta, l, new_l+1);
00508     SGVector<float32_t>::resize(g, l, new_l+1);
00509     beta[new_l]=0;
00510     g[new_l]=0;
00511     l = new_l;
00512     return count;
00513 }
00514 
00515 // --- Below are information or "get" functions --- //
00516 //
00517 float64_t LaRankOutput::getW2 ()
00518 {
00519     float64_t sum = 0;
00520     int32_t *r2i = larank_kcache_r2i (kernel, l + 1);
00521     for (int32_t r = 0; r < l; r++)
00522     {
00523         float32_t *row_r = larank_kcache_query_row (kernel, r2i[r], l);
00524         sum += beta[r] * SGVector<float32_t>::dot (beta, row_r, l);
00525     }
00526     return sum;
00527 }
00528 
00529 float64_t LaRankOutput::getKii (int32_t x_id)
00530 {
00531     return larank_kcache_query (kernel, x_id, x_id);
00532 }
00533 
00534 //
00535 float64_t LaRankOutput::getBeta (int32_t x_id)
00536 {
00537     int32_t *r2i = larank_kcache_r2i (kernel, l);
00538     int32_t xr = -1;
00539     for (int32_t r = 0; r < l; r++)
00540         if (r2i[r] == x_id)
00541         {
00542             xr = r;
00543             break;
00544         }
00545     return (xr < 0 ? 0 : beta[xr]);
00546 }
00547 
00548 //
00549 float64_t LaRankOutput::getGradient (int32_t x_id)
00550 {
00551     int32_t *r2i = larank_kcache_r2i (kernel, l);
00552     int32_t xr = -1;
00553     for (int32_t r = 0; r < l; r++)
00554         if (r2i[r] == x_id)
00555         {
00556             xr = r;
00557             break;
00558         }
00559     return (xr < 0 ? 0 : g[xr]);
00560 }
00561 bool LaRankOutput::isSupportVector (int32_t x_id) const
00562 {
00563     int32_t *r2i = larank_kcache_r2i (kernel, l);
00564     int32_t xr = -1;
00565     for (int32_t r = 0; r < l; r++)
00566         if (r2i[r] == x_id)
00567         {
00568             xr = r;
00569             break;
00570         }
00571     return (xr >= 0);
00572 }
00573 
00574 //
00575 int32_t LaRankOutput::getSV (float32_t* &sv) const
00576 {
00577     sv=SG_MALLOC(float32_t, l);
00578     int32_t *r2i = larank_kcache_r2i (kernel, l);
00579     for (int32_t r = 0; r < l; r++)
00580         sv[r]=r2i[r];
00581     return l;
00582 }
00583 
00584 CLaRank::CLaRank (): CMulticlassSVM(new CMulticlassOneVsRestStrategy()),
00585     nb_seen_examples (0), nb_removed (0),
00586     n_pro (0), n_rep (0), n_opt (0),
00587     w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
00588     batch_mode(true), step(0)
00589 {
00590 }
00591 
00592 CLaRank::CLaRank (float64_t C, CKernel* k, CLabels* lab):
00593     CMulticlassSVM(new CMulticlassOneVsRestStrategy(), C, k, lab),
00594     nb_seen_examples (0), nb_removed (0),
00595     n_pro (0), n_rep (0), n_opt (0),
00596     w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
00597     batch_mode(true), step(0)
00598 {
00599 }
00600 
00601 CLaRank::~CLaRank ()
00602 {
00603     destroy();
00604 }
00605 
00606 bool CLaRank::train_machine(CFeatures* data)
00607 {
00608     tau = 0.0001;
00609 
00610     ASSERT(m_kernel);
00611     ASSERT(m_labels && m_labels->get_num_labels());
00612     ASSERT(m_labels->get_label_type() == LT_MULTICLASS);
00613 
00614     CSignal::clear_cancel();
00615 
00616     if (data)
00617     {
00618         if (data->get_num_vectors() != m_labels->get_num_labels())
00619         {
00620             SG_ERROR("Numbert of vectors (%d) does not match number of labels (%d)\n",
00621                     data->get_num_vectors(), m_labels->get_num_labels());
00622         }
00623         m_kernel->init(data, data);
00624     }
00625 
00626     ASSERT(m_kernel->get_num_vec_lhs() && m_kernel->get_num_vec_rhs());
00627 
00628     nb_train=m_labels->get_num_labels();
00629     cache = m_kernel->get_cache_size();
00630 
00631     int32_t n_it = 1;
00632     float64_t gap = DBL_MAX;
00633 
00634     SG_INFO("Training on %d examples\n", nb_train);
00635     while (gap > get_C() && (!CSignal::cancel_computations()))      // stopping criteria
00636     {
00637         float64_t tr_err = 0;
00638         int32_t ind = step;
00639         for (int32_t i = 0; i < nb_train; i++)
00640         {
00641             int32_t y=((CMulticlassLabels*) m_labels)->get_label(i);
00642             if (add (i, y) != y)   // call the add function
00643                 tr_err++;
00644 
00645             if (ind && i / ind)
00646             {
00647                 SG_DEBUG("Done: %d %% Train error (online): %f%%\n",
00648                         (int32_t) (((float64_t) i) / nb_train * 100), (tr_err / ((float64_t) i + 1)) * 100);
00649                 ind += step;
00650             }
00651         }
00652 
00653         SG_DEBUG("End of iteration %d\n", n_it++);
00654         SG_DEBUG("Train error (online): %f%%\n", (tr_err / nb_train) * 100);
00655         gap = computeGap ();
00656         SG_ABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(DBL_MAX), -CMath::log10(get_C()), 6);
00657 
00658         if (!batch_mode)        // skip stopping criteria if online mode
00659             gap = 0;
00660     }
00661     SG_DONE();
00662 
00663     int32_t num_classes = outputs.size();
00664     create_multiclass_svm(num_classes);
00665     SG_DEBUG("%d classes\n", num_classes);
00666 
00667     // Used for saving a model file
00668     int32_t i=0;
00669     for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
00670     {
00671         const LaRankOutput* o=&(it->second);
00672 
00673         larank_kcache_t* k=o->getKernel();
00674         int32_t l=o->get_l();
00675         float32_t* beta=o->getBetas();
00676         int32_t *r2i = larank_kcache_r2i (k, l);
00677 
00678         ASSERT(l>0);
00679         SG_DEBUG("svm[%d] has %d sv, b=%f\n", i, l, 0.0);
00680 
00681         CSVM* svm=new CSVM(l);
00682 
00683         for (int32_t j=0; j<l; j++)
00684         {
00685             svm->set_alpha(j, beta[j]);
00686             svm->set_support_vector(j, r2i[j]);
00687         }
00688 
00689         svm->set_bias(0);
00690         set_svm(i, svm);
00691         i++;
00692     }
00693     destroy();
00694 
00695     return true;
00696 }
00697 
00698 // LEARNING FUNCTION: add new patterns and run optimization steps selected with adaptative schedule
00699 int32_t CLaRank::add (int32_t x_id, int32_t yi)
00700 {
00701     ++nb_seen_examples;
00702     // create a new output object if this one has never been seen before
00703     if (!getOutput (yi))
00704     {
00705         outputs.insert (std::make_pair (yi, LaRankOutput ()));
00706         LaRankOutput *cur = getOutput (yi);
00707         cur->initialize (m_kernel, cache);
00708         if (outputs.size () == 1)
00709             y0 = outputs.begin ()->first;
00710         // link the cache of this new output to a buddy
00711         if (outputs.size () > 1)
00712         {
00713             LaRankOutput *out0 = getOutput (y0);
00714             cur->set_kernel_buddy (out0->getKernel ());
00715         }
00716     }
00717 
00718     LaRankPattern tpattern (x_id, yi);
00719     LaRankPattern & pattern = (patterns.isPattern (x_id)) ? patterns.getPattern (x_id) : tpattern;
00720 
00721     // ProcessNew with the "fresh" pattern
00722     float64_t time1 = CTime::get_curtime();
00723     process_return_t pro_ret = process (pattern, processNew);
00724     float64_t dual_increase = pro_ret.dual_increase;
00725     float64_t duration = (CTime::get_curtime() - time1);
00726     float64_t coeff = dual_increase / (0.00001 + duration);
00727     dual += dual_increase;
00728     n_pro++;
00729     w_pro = 0.05 * coeff + (1 - 0.05) * w_pro;
00730 
00731     // ProcessOld & Optimize until ready for a new processnew
00732     // (Adaptative schedule here)
00733     for (;;)
00734     {
00735         float64_t w_sum = w_pro + w_rep + w_opt;
00736         float64_t prop_min = w_sum / 20;
00737         if (w_pro < prop_min)
00738             w_pro = prop_min;
00739         if (w_rep < prop_min)
00740             w_rep = prop_min;
00741         if (w_opt < prop_min)
00742             w_opt = prop_min;
00743         w_sum = w_pro + w_rep + w_opt;
00744         float64_t r = CMath::random(0.0, w_sum);
00745         if (r <= w_pro)
00746         {
00747             break;
00748         }
00749         else if ((r > w_pro) && (r <= w_pro + w_rep))   // ProcessOld here
00750         {
00751             float64_t ltime1 = CTime::get_curtime ();
00752             float64_t ldual_increase = reprocess ();
00753             float64_t lduration = (CTime::get_curtime () - ltime1);
00754             float64_t lcoeff = ldual_increase / (0.00001 + lduration);
00755             dual += ldual_increase;
00756             n_rep++;
00757             w_rep = 0.05 * lcoeff + (1 - 0.05) * w_rep;
00758         }
00759         else            // Optimize here
00760         {
00761             float64_t ltime1 = CTime::get_curtime ();
00762             float64_t ldual_increase = optimize ();
00763             float64_t lduration = (CTime::get_curtime () - ltime1);
00764             float64_t lcoeff = ldual_increase / (0.00001 + lduration);
00765             dual += ldual_increase;
00766             n_opt++;
00767             w_opt = 0.05 * lcoeff + (1 - 0.05) * w_opt;
00768         }
00769     }
00770     if (nb_seen_examples % 100 == 0)    // Cleanup useless Support Vectors/Patterns sometimes
00771         nb_removed += cleanup ();
00772     return pro_ret.ypred;
00773 }
00774 
00775 // PREDICTION FUNCTION: main function in la_rank_classify
00776 int32_t CLaRank::predict (int32_t x_id)
00777 {
00778     int32_t res = -1;
00779     float64_t score_max = -DBL_MAX;
00780     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
00781     {
00782         float64_t score = it->second.computeScore (x_id);
00783         if (score > score_max)
00784         {
00785             score_max = score;
00786             res = it->first;
00787         }
00788     }
00789     return res;
00790 }
00791 
00792 void CLaRank::destroy ()
00793 {
00794     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
00795         it->second.destroy ();
00796     outputs.clear();
00797 }
00798 
00799 
00800 // Compute Duality gap (costly but used in stopping criteria in batch mode)
00801 float64_t CLaRank::computeGap ()
00802 {
00803     float64_t sum_sl = 0;
00804     float64_t sum_bi = 0;
00805     for (uint32_t i = 0; i < patterns.maxcount (); ++i)
00806     {
00807         const LaRankPattern & p = patterns[i];
00808         if (!p.exists ())
00809             continue;
00810         LaRankOutput *out = getOutput (p.y);
00811         if (!out)
00812             continue;
00813         sum_bi += out->getBeta (p.x_id);
00814         float64_t gi = out->computeGradient (p.x_id, p.y, p.y);
00815         float64_t gmin = DBL_MAX;
00816         for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
00817         {
00818             if (it->first != p.y && it->second.isSupportVector (p.x_id))
00819             {
00820                 float64_t g =
00821                     it->second.computeGradient (p.x_id, p.y, it->first);
00822                 if (g < gmin)
00823                     gmin = g;
00824             }
00825         }
00826         sum_sl += CMath::max (0.0, gi - gmin);
00827     }
00828     return CMath::max (0.0, computeW2 () + get_C() * sum_sl - sum_bi);
00829 }
00830 
00831 // Nuber of classes so far
00832 uint32_t CLaRank::getNumOutputs () const
00833 {
00834     return outputs.size ();
00835 }
00836 
00837 // Number of Support Vectors
00838 int32_t CLaRank::getNSV ()
00839 {
00840     int32_t res = 0;
00841     for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
00842     {
00843         float32_t* sv=NULL;
00844         res += it->second.getSV (sv);
00845         SG_FREE(sv);
00846     }
00847     return res;
00848 }
00849 
00850 // Norm of the parameters vector
00851 float64_t CLaRank::computeW2 ()
00852 {
00853     float64_t res = 0;
00854     for (uint32_t i = 0; i < patterns.maxcount (); ++i)
00855     {
00856         const LaRankPattern & p = patterns[i];
00857         if (!p.exists ())
00858             continue;
00859         for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
00860             if (it->second.getBeta (p.x_id))
00861                 res += it->second.getBeta (p.x_id) * it->second.computeScore (p.x_id);
00862     }
00863     return res;
00864 }
00865 
00866 // Compute Dual objective value
00867 float64_t CLaRank::getDual ()
00868 {
00869     float64_t res = 0;
00870     for (uint32_t i = 0; i < patterns.maxcount (); ++i)
00871     {
00872         const LaRankPattern & p = patterns[i];
00873         if (!p.exists ())
00874             continue;
00875         LaRankOutput *out = getOutput (p.y);
00876         if (!out)
00877             continue;
00878         res += out->getBeta (p.x_id);
00879     }
00880     return res - computeW2 () / 2;
00881 }
00882 
00883 LaRankOutput *CLaRank::getOutput (int32_t index)
00884 {
00885     outputhash_t::iterator it = outputs.find (index);
00886     return it == outputs.end ()? NULL : &it->second;
00887 }
00888 
00889 // IMPORTANT Main SMO optimization step
00890 CLaRank::process_return_t CLaRank::process (const LaRankPattern & pattern, process_type ptype)
00891 {
00892     process_return_t pro_ret = process_return_t (0, 0);
00893 
00894     /*
00895      ** compute gradient and sort
00896      */
00897     std::vector < outputgradient_t > outputgradients;
00898 
00899     outputgradients.reserve (getNumOutputs ());
00900 
00901     std::vector < outputgradient_t > outputscores;
00902     outputscores.reserve (getNumOutputs ());
00903 
00904     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
00905     {
00906         if (ptype != processOptimize
00907                 || it->second.isSupportVector (pattern.x_id))
00908         {
00909             float64_t g =
00910                 it->second.computeGradient (pattern.x_id, pattern.y, it->first);
00911             outputgradients.push_back (outputgradient_t (it->first, g));
00912             if (it->first == pattern.y)
00913                 outputscores.push_back (outputgradient_t (it->first, (1 - g)));
00914             else
00915                 outputscores.push_back (outputgradient_t (it->first, -g));
00916         }
00917     }
00918 
00919     std::sort (outputgradients.begin (), outputgradients.end ());
00920 
00921     /*
00922      ** determine the prediction
00923      */
00924     std::sort (outputscores.begin (), outputscores.end ());
00925     pro_ret.ypred = outputscores[0].output;
00926 
00927     /*
00928      ** Find yp (1st part of the pair)
00929      */
00930     outputgradient_t ygp;
00931     LaRankOutput *outp = NULL;
00932     uint32_t p;
00933     for (p = 0; p < outputgradients.size (); ++p)
00934     {
00935         outputgradient_t & current = outputgradients[p];
00936         LaRankOutput *output = getOutput (current.output);
00937         bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
00938         bool goodclass = current.output == pattern.y;
00939         if ((!support && goodclass) ||
00940                 (support && output->getBeta (pattern.x_id) < (goodclass ? get_C() : 0)))
00941         {
00942             ygp = current;
00943             outp = output;
00944             break;
00945         }
00946     }
00947     if (p == outputgradients.size ())
00948         return pro_ret;
00949 
00950     /*
00951      ** Find ym (2nd part of the pair)
00952      */
00953     outputgradient_t ygm;
00954     LaRankOutput *outm = NULL;
00955     int32_t m;
00956     for (m = outputgradients.size () - 1; m >= 0; --m)
00957     {
00958         outputgradient_t & current = outputgradients[m];
00959         LaRankOutput *output = getOutput (current.output);
00960         bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
00961         bool goodclass = current.output == pattern.y;
00962         if (!goodclass || (support && output->getBeta (pattern.x_id) > 0))
00963         {
00964             ygm = current;
00965             outm = output;
00966             break;
00967         }
00968     }
00969     if (m < 0)
00970         return pro_ret;
00971 
00972     /*
00973      ** Throw or Insert pattern
00974      */
00975     if ((ygp.gradient - ygm.gradient) < tau)
00976         return pro_ret;
00977     if (ptype == processNew)
00978         patterns.insert (pattern);
00979 
00980     /*
00981      ** compute lambda and clip it
00982      */
00983     float64_t kii = outp->getKii (pattern.x_id);
00984     float64_t lambda = (ygp.gradient - ygm.gradient) / (2 * kii);
00985     if (ptype == processOptimize || outp->isSupportVector (pattern.x_id))
00986     {
00987         float64_t beta = outp->getBeta (pattern.x_id);
00988         if (ygp.output == pattern.y)
00989             lambda = CMath::min (lambda, get_C() - beta);
00990         else
00991             lambda = CMath::min (lambda, fabs (beta));
00992     }
00993     else
00994         lambda = CMath::min (lambda, get_C());
00995 
00996     /*
00997      ** update the solution
00998      */
00999     outp->update (pattern.x_id, lambda, ygp.gradient);
01000     outm->update (pattern.x_id, -lambda, ygm.gradient);
01001 
01002     pro_ret.dual_increase = lambda * ((ygp.gradient - ygm.gradient) - lambda * kii);
01003     return pro_ret;
01004 }
01005 
01006 // ProcessOld
01007 float64_t CLaRank::reprocess ()
01008 {
01009     if (patterns.size ())
01010     {
01011         for (int32_t n = 0; n < 10; ++n)
01012         {
01013             process_return_t pro_ret = process (patterns.sample (), processOld);
01014             if (pro_ret.dual_increase)
01015                 return pro_ret.dual_increase;
01016         }
01017     }
01018     return 0;
01019 }
01020 
01021 // Optimize
01022 float64_t CLaRank::optimize ()
01023 {
01024     float64_t dual_increase = 0;
01025     if (patterns.size ())
01026     {
01027         for (int32_t n = 0; n < 10; ++n)
01028         {
01029             process_return_t pro_ret =
01030                 process (patterns.sample(), processOptimize);
01031             dual_increase += pro_ret.dual_increase;
01032         }
01033     }
01034     return dual_increase;
01035 }
01036 
01037 // remove patterns and return the number of patterns that were removed
01038 uint32_t CLaRank::cleanup ()
01039 {
01040     /*
01041     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
01042         it->second.cleanup ();
01043 
01044     uint32_t res = 0;
01045     for (uint32_t i = 0; i < patterns.size (); ++i)
01046     {
01047         LaRankPattern & p = patterns[i];
01048         if (p.exists () && !outputs[p.y].isSupportVector (p.x_id))
01049         {
01050             patterns.remove (i);
01051             ++res;
01052         }
01053     }
01054     return res;
01055     */
01056     return 0;
01057 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation