LaRank.cpp

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 // Main functions of the LaRank algorithm for soving Multiclass SVM
00003 // Copyright (C) 2008- Antoine Bordes
00004 // Shogun specific adjustments (w) 2009 Soeren Sonnenburg
00005 
00006 // This library is free software; you can redistribute it and/or
00007 // modify it under the terms of the GNU Lesser General Public
00008 // License as published by the Free Software Foundation; either
00009 // version 2.1 of the License, or (at your option) any later version.
00010 // 
00011 // This program is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 // 
00016 // You should have received a copy of the GNU General Public License
00017 // aint64_t with this program; if not, write to the Free Software
00018 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
00019 //
00020 /***********************************************************************
00021  * 
00022  *  LUSH Lisp Universal Shell
00023  *    Copyright (C) 2002 Leon Bottou, Yann Le Cun, AT&T Corp, NECI.
00024  *  Includes parts of TL3:
00025  *    Copyright (C) 1987-1999 Leon Bottou and Neuristique.
00026  *  Includes selected parts of SN3.2:
00027  *    Copyright (C) 1991-2001 AT&T Corp.
00028  * 
00029  *  This program is free software; you can redistribute it and/or modify
00030  *  it under the terms of the GNU General Public License as published by
00031  *  the Free Software Foundation; either version 2 of the License, or
00032  *  (at your option) any later version.
00033  * 
00034  *  This program is distributed in the hope that it will be useful,
00035  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00036  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00037  *  GNU General Public License for more details.
00038  * 
00039  *  You should have received a copy of the GNU General Public License
00040  *  aint64_t with this program; if not, write to the Free Software
00041  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
00042  * 
00043  ***********************************************************************/
00044 
00045 /***********************************************************************
00046  * $Id: kcache.c,v 1.9 2007/01/25 22:42:09 leonb Exp $
00047  **********************************************************************/
00048 
00049 #include <vector>
00050 #include <algorithm>
00051 
00052 #include <shogun/io/SGIO.h>
00053 #include <shogun/lib/Signal.h>
00054 #include <shogun/lib/Time.h>
00055 #include <shogun/mathematics/Math.h>
00056 #include <shogun/classifier/svm/LaRank.h>
00057 #include <shogun/kernel/Kernel.h>
00058 
00059 using namespace shogun;
00060 
00061 namespace shogun
00062 {
00063     static larank_kcache_t* larank_kcache_create (CKernel* kernelfunc)
00064     {
00065         larank_kcache_t *self;
00066         self = SG_CALLOC (larank_kcache_t, 1);
00067         self->l = 0;
00068         self->maxrowlen = 0;
00069         self->func = kernelfunc;
00070         self->prevbuddy = self;
00071         self->nextbuddy = self;
00072         self->cursize = sizeof (larank_kcache_t);
00073         self->maxsize = 256 * 1024 * 1024;
00074         self->qprev = SG_MALLOC(int32_t, 1);
00075         self->qnext = SG_MALLOC(int32_t, 1);
00076         self->rnext = self->qnext + 1;
00077         self->rprev = self->qprev + 1;
00078         self->rprev[-1] = -1;
00079         self->rnext[-1] = -1;
00080         return self;
00081     }
00082 
00083     static void xtruncate (larank_kcache_t * self, int32_t k, int32_t nlen)
00084     {
00085         int32_t olen = self->rsize[k];
00086         if (nlen < olen)
00087         {
00088             float32_t *ndata;
00089             float32_t *odata = self->rdata[k];
00090             if (nlen > 0)
00091             {
00092                 ndata = SG_MALLOC(float32_t, nlen);
00093                 memcpy (ndata, odata, nlen * sizeof (float32_t));
00094             }
00095             else
00096             {
00097                 ndata = 0;
00098                 self->rnext[self->rprev[k]] = self->rnext[k];
00099                 self->rprev[self->rnext[k]] = self->rprev[k];
00100                 self->rnext[k] = self->rprev[k] = k;
00101             }
00102             SG_FREE (odata);
00103             self->rdata[k] = ndata;
00104             self->rsize[k] = nlen;
00105             self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
00106         }
00107     }
00108 
00109     static void xpurge (larank_kcache_t * self)
00110     {
00111         if (self->cursize > self->maxsize)
00112         {
00113             int32_t k = self->rprev[-1];
00114             while (self->cursize > self->maxsize && k != self->rnext[-1])
00115             {
00116                 int32_t pk = self->rprev[k];
00117                 xtruncate (self, k, 0);
00118                 k = pk;
00119             }
00120         }
00121     }
00122 
00123     static void larank_kcache_set_maximum_size (larank_kcache_t * self, int64_t entries)
00124     {
00125         ASSERT (self);
00126         ASSERT (entries > 0);
00127         self->maxsize = entries;
00128         xpurge (self);
00129     }
00130 
00131     static void larank_kcache_destroy (larank_kcache_t * self)
00132     {
00133         if (self)
00134         {
00135             int32_t i;
00136             larank_kcache_t *nb = self->nextbuddy;
00137             larank_kcache_t *pb = self->prevbuddy;
00138             pb->nextbuddy = nb;
00139             nb->prevbuddy = pb;
00140             /* delete */
00141             if (self->i2r)
00142                 SG_FREE (self->i2r);
00143             if (self->r2i)
00144                 SG_FREE (self->r2i);
00145             if (self->rdata)
00146                 for (i = 0; i < self->l; i++)
00147                     if (self->rdata[i])
00148                         SG_FREE (self->rdata[i]);
00149             if (self->rdata)
00150                 SG_FREE (self->rdata);
00151             if (self->rsize)
00152                 SG_FREE (self->rsize);
00153             if (self->rdiag)
00154                 SG_FREE (self->rdiag);
00155             if (self->qnext)
00156                 SG_FREE (self->qnext);
00157             if (self->qprev)
00158                 SG_FREE (self->qprev);
00159             memset (self, 0, sizeof (larank_kcache_t));
00160             SG_FREE (self);
00161         }
00162     }
00163 
00164     static void xminsize (larank_kcache_t * self, int32_t n)
00165     {
00166         int32_t ol = self->l;
00167         if (n > ol)
00168         {
00169             int32_t i;
00170             int32_t nl = CMath::max (256, ol);
00171             while (nl < n)
00172                 nl = nl + nl;
00173             self->i2r = SG_REALLOC (int32_t, self->i2r, nl);
00174             self->r2i = SG_REALLOC (int32_t, self->r2i, nl);
00175             self->rsize = SG_REALLOC (int32_t, self->rsize, nl);
00176             self->qnext = SG_REALLOC (int32_t, self->qnext, (1 + nl));
00177             self->qprev = SG_REALLOC (int32_t, self->qprev, (1 + nl));
00178             self->rdiag = SG_REALLOC (float32_t, self->rdiag, nl);
00179             self->rdata = SG_REALLOC (float32_t*, self->rdata, nl);
00180             self->rnext = self->qnext + 1;
00181             self->rprev = self->qprev + 1;
00182             for (i = ol; i < nl; i++)
00183             {
00184                 self->i2r[i] = i;
00185                 self->r2i[i] = i;
00186                 self->rsize[i] = -1;
00187                 self->rnext[i] = i;
00188                 self->rprev[i] = i;
00189                 self->rdata[i] = 0;
00190             }
00191             self->l = nl;
00192         }
00193     }
00194 
00195     static int32_t* larank_kcache_r2i (larank_kcache_t * self, int32_t n)
00196     {
00197         xminsize (self, n);
00198         return self->r2i;
00199     }
00200 
00201     static void xextend (larank_kcache_t * self, int32_t k, int32_t nlen)
00202     {
00203         int32_t olen = self->rsize[k];
00204         if (nlen > olen)
00205         {
00206             float32_t *ndata = SG_MALLOC(float32_t, nlen);
00207             if (olen > 0)
00208             {
00209                 float32_t *odata = self->rdata[k];
00210                 memcpy (ndata, odata, olen * sizeof (float32_t));
00211                 SG_FREE (odata);
00212             }
00213             self->rdata[k] = ndata;
00214             self->rsize[k] = nlen;
00215             self->cursize += (int64_t) (nlen - olen) * sizeof (float32_t);
00216             self->maxrowlen = CMath::max (self->maxrowlen, nlen);
00217         }
00218     }
00219 
00220     static void xswap (larank_kcache_t * self, int32_t i1, int32_t i2, int32_t r1, int32_t r2)
00221     {
00222         /* swap row data */
00223         if (r1 < self->maxrowlen || r2 < self->maxrowlen)
00224         {
00225             int32_t mrl = 0;
00226             int32_t k = self->rnext[-1];
00227             while (k >= 0)
00228             {
00229                 int32_t nk = self->rnext[k];
00230                 int32_t n = self->rsize[k];
00231                 int32_t rr = self->i2r[k];
00232                 float32_t *d = self->rdata[k];
00233                 if (r1 < n)
00234                 {
00235                     if (r2 < n)
00236                     {
00237                         float32_t t1 = d[r1];
00238                         float32_t t2 = d[r2];
00239                         d[r1] = t2;
00240                         d[r2] = t1;
00241                     }
00242                     else if (rr == r2)
00243                     {
00244                         d[r1] = self->rdiag[k];
00245                     }
00246                     else
00247                     {
00248                         int32_t arsize = self->rsize[i2];
00249                         if (rr < arsize && rr != r1)
00250                             d[r1] = self->rdata[i2][rr];
00251                         else
00252                             xtruncate (self, k, r1);
00253                     }
00254                 }
00255                 else if (r2 < n)
00256                 {
00257                     if (rr == r1)
00258                     {
00259                         d[r2] = self->rdiag[k];
00260                     }
00261                     else
00262                     {
00263                         int32_t arsize = self->rsize[i1];
00264                         if (rr < arsize && rr != r2)
00265                             d[r2] = self->rdata[i1][rr];
00266                         else
00267                             xtruncate (self, k, r2);
00268                     }
00269                 }
00270                 mrl = CMath::max (mrl, self->rsize[k]);
00271                 k = nk;
00272             }
00273             self->maxrowlen = mrl;
00274         }
00275         /* swap r2i and i2r */
00276         self->r2i[r1] = i2;
00277         self->r2i[r2] = i1;
00278         self->i2r[i1] = r2;
00279         self->i2r[i2] = r1;
00280     }
00281 
00282     static void larank_kcache_swap_rr (larank_kcache_t * self, int32_t r1, int32_t r2)
00283     {
00284         xminsize (self, 1 + CMath::max(r1, r2));
00285         xswap (self, self->r2i[r1], self->r2i[r2], r1, r2);
00286     }
00287 
00288     static void larank_kcache_swap_ri (larank_kcache_t * self, int32_t r1, int32_t i2)
00289     {
00290         xminsize (self, 1 + CMath::max (r1, i2));
00291         xswap (self, self->r2i[r1], i2, r1, self->i2r[i2]);
00292     }
00293 
00294     static float64_t xquery (larank_kcache_t * self, int32_t i, int32_t j)
00295     {
00296         /* search buddies */
00297         larank_kcache_t *cache = self->nextbuddy;
00298         do
00299         {
00300             int32_t l = cache->l;
00301             if (i < l && j < l)
00302             {
00303                 int32_t s = cache->rsize[i];
00304                 int32_t p = cache->i2r[j];
00305                 if (p < s)
00306                     return cache->rdata[i][p];
00307                 if (i == j && s >= 0)
00308                     return cache->rdiag[i];
00309                 p = cache->i2r[i];
00310                 s = cache->rsize[j];
00311                 if (p < s)
00312                     return cache->rdata[j][p];
00313             }
00314             cache = cache->nextbuddy;
00315         }
00316         while (cache != self);
00317         /* compute */
00318         return self->func->kernel(i, j);
00319     }
00320 
00321 
00322     static float64_t larank_kcache_query (larank_kcache_t * self, int32_t i, int32_t j)
00323     {
00324         ASSERT (self);
00325         ASSERT (i >= 0);
00326         ASSERT (j >= 0);
00327         return xquery (self, i, j);
00328     }
00329 
00330 
00331     static void larank_kcache_set_buddy (larank_kcache_t * self, larank_kcache_t * buddy)
00332     {
00333         larank_kcache_t *p = self;
00334         larank_kcache_t *selflast = self->prevbuddy;
00335         larank_kcache_t *buddylast = buddy->prevbuddy;
00336         /* check functions are identical */
00337         ASSERT (self->func == buddy->func);
00338         /* make sure we are not already buddies */
00339         do
00340         {
00341             if (p == buddy)
00342                 return;
00343             p = p->nextbuddy;
00344         }
00345         while (p != self);
00346         /* link */
00347         selflast->nextbuddy = buddy;
00348         buddy->prevbuddy = selflast;
00349         buddylast->nextbuddy = self;
00350         self->prevbuddy = buddylast;
00351     }
00352 
00353 
00354     static float32_t* larank_kcache_query_row (larank_kcache_t * self, int32_t i, int32_t len)
00355     {
00356         ASSERT (i >= 0);
00357         if (i < self->l && len <= self->rsize[i])
00358         {
00359             self->rnext[self->rprev[i]] = self->rnext[i];
00360             self->rprev[self->rnext[i]] = self->rprev[i];
00361         }
00362         else
00363         {
00364             int32_t olen, p;
00365             float32_t *d;
00366             if (i >= self->l || len >= self->l)
00367                 xminsize (self, CMath::max (1 + i, len));
00368             olen = self->rsize[i];
00369             if (olen < len)
00370             {
00371                 if (olen < 0)
00372                 {
00373                     self->rdiag[i] = self->func->kernel(i, i);
00374                     olen = self->rsize[i] = 0;
00375                 }
00376                 xextend (self, i, len);
00377                 d = self->rdata[i];
00378                 self->rsize[i] = olen;
00379                 for (p = olen; p < len; p++)
00380                     d[p] = larank_kcache_query (self, self->r2i[p], i);
00381                 self->rsize[i] = len;
00382             }
00383             self->rnext[self->rprev[i]] = self->rnext[i];
00384             self->rprev[self->rnext[i]] = self->rprev[i];
00385             xpurge (self);
00386         }
00387         self->rprev[i] = -1;
00388         self->rnext[i] = self->rnext[-1];
00389         self->rnext[self->rprev[i]] = i;
00390         self->rprev[self->rnext[i]] = i;
00391         return self->rdata[i];
00392     }
00393 
00394 }
00395 
00396 
00397 // Initializing an output class (basically creating a kernel cache for it)
00398 void LaRankOutput::initialize (CKernel* kfunc, int64_t cache)
00399 {
00400     kernel = larank_kcache_create (kfunc);
00401     larank_kcache_set_maximum_size (kernel, cache * 1024 * 1024);
00402     beta = SG_MALLOC(float32_t, 1);
00403     g = SG_MALLOC(float32_t, 1);
00404     *beta=0;
00405     *g=0;
00406     l = 0;
00407 }
00408 
00409 // Destroying an output class (basically destroying the kernel cache)
00410 void LaRankOutput::destroy ()
00411 {
00412     larank_kcache_destroy (kernel);
00413     kernel=NULL;
00414     SG_FREE(beta);
00415     SG_FREE(g);
00416     beta=NULL;
00417     g=NULL;
00418 }
00419 
00420 // !Important! Computing the score of a given input vector for the actual output
00421 float64_t LaRankOutput::computeScore (int32_t x_id)
00422 {
00423     if (l == 0)
00424         return 0;
00425     else
00426     {
00427         float32_t *row = larank_kcache_query_row (kernel, x_id, l);
00428         return CMath::dot (beta, row, l);
00429     }
00430 }
00431 
00432 // !Important! Computing the gradient of a given input vector for the actual output           
00433 float64_t LaRankOutput::computeGradient (int32_t xi_id, int32_t yi, int32_t ythis)
00434 {
00435     return (yi == ythis ? 1 : 0) - computeScore (xi_id);
00436 }
00437 
00438 // Updating the solution in the actual output
00439 void LaRankOutput::update (int32_t x_id, float64_t lambda, float64_t gp)
00440 {
00441     int32_t *r2i = larank_kcache_r2i (kernel, l);
00442     int64_t xr = l + 1;
00443     for (int32_t r = 0; r < l; r++)
00444         if (r2i[r] == x_id)
00445         {
00446             xr = r;
00447             break;
00448         }
00449 
00450     // updates the cache order and the beta coefficient
00451     if (xr < l)
00452     {
00453         beta[xr]+=lambda;
00454     }
00455     else
00456     {
00457         larank_kcache_swap_ri (kernel, l, x_id);
00458         CMath::resize(g, l, l+1);
00459         CMath::resize(beta, l, l+1);
00460         g[l]=gp;
00461         beta[l]=lambda;
00462         l++;
00463     }
00464 
00465     // update stored gradients
00466     float32_t *row = larank_kcache_query_row (kernel, x_id, l);
00467     for (int32_t r = 0; r < l; r++)
00468     {
00469         float64_t oldg = g[r];
00470         g[r]=oldg - lambda * row[r];
00471     }
00472 }
00473 
00474 // Linking the cahe of this output to the cache of an other "buddy" output
00475 // so that if a requested value is not found in this cache, you can ask your buddy if it has it.                              
00476 void LaRankOutput::set_kernel_buddy (larank_kcache_t * bud)
00477 {
00478     larank_kcache_set_buddy (bud, kernel);
00479 }
00480 
00481 // Removing useless support vectors (for which beta=0)                
00482 int32_t LaRankOutput::cleanup ()
00483 {
00484     int32_t count = 0;
00485     std::vector < int32_t >idx;
00486     for (int32_t x = 0; x < l; x++)
00487     {
00488         if ((beta[x] < FLT_EPSILON) && (beta[x] > -FLT_EPSILON))
00489         {
00490             idx.push_back (x);
00491             count++;
00492         }
00493     }
00494     int32_t new_l = l - count;
00495     for (int32_t xx = 0; xx < count; xx++)
00496     {
00497         int32_t i = idx[xx] - xx;
00498         for (int32_t r = i; r < (l - 1); r++)
00499         {
00500             larank_kcache_swap_rr (kernel, r, int64_t(r) + 1);
00501             beta[r]=beta[r + 1];
00502             g[r]=g[r + 1];
00503         }
00504     }
00505     CMath::resize(beta, l, new_l+1);
00506     CMath::resize(g, l, new_l+1);
00507     beta[new_l]=0;
00508     g[new_l]=0;
00509     l = new_l;
00510     return count;
00511 }
00512 
00513 // --- Below are information or "get" functions --- //
00514 //
00515 float64_t LaRankOutput::getW2 ()
00516 {
00517     float64_t sum = 0;
00518     int32_t *r2i = larank_kcache_r2i (kernel, l + 1);
00519     for (int32_t r = 0; r < l; r++)
00520     {
00521         float32_t *row_r = larank_kcache_query_row (kernel, r2i[r], l);
00522         sum += beta[r] * CMath::dot (beta, row_r, l);
00523     }
00524     return sum;
00525 }
00526 
00527 float64_t LaRankOutput::getKii (int32_t x_id)
00528 {
00529     return larank_kcache_query (kernel, x_id, x_id);
00530 }
00531 
00532 //
00533 float64_t LaRankOutput::getBeta (int32_t x_id)
00534 {
00535     int32_t *r2i = larank_kcache_r2i (kernel, l);
00536     int32_t xr = -1;
00537     for (int32_t r = 0; r < l; r++)
00538         if (r2i[r] == x_id)
00539         {
00540             xr = r;
00541             break;
00542         }
00543     return (xr < 0 ? 0 : beta[xr]);
00544 }
00545 
00546 //
00547 float64_t LaRankOutput::getGradient (int32_t x_id)
00548 {
00549     int32_t *r2i = larank_kcache_r2i (kernel, l);
00550     int32_t xr = -1;
00551     for (int32_t r = 0; r < l; r++)
00552         if (r2i[r] == x_id)
00553         {
00554             xr = r;
00555             break;
00556         }
00557     return (xr < 0 ? 0 : g[xr]);
00558 }
00559 bool LaRankOutput::isSupportVector (int32_t x_id) const
00560 {
00561     int32_t *r2i = larank_kcache_r2i (kernel, l);
00562     int32_t xr = -1;
00563     for (int32_t r = 0; r < l; r++)
00564         if (r2i[r] == x_id)
00565         {
00566             xr = r;
00567             break;
00568         }
00569     return (xr >= 0);
00570 }
00571 
00572 //
00573 int32_t LaRankOutput::getSV (float32_t* &sv) const
00574 {
00575     sv=SG_MALLOC(float32_t, l);
00576     int32_t *r2i = larank_kcache_r2i (kernel, l);
00577     for (int32_t r = 0; r < l; r++)
00578         sv[r]=r2i[r];
00579     return l;
00580 }
00581 
00582 CLaRank::CLaRank (): CMultiClassSVM(ONE_VS_REST), 
00583     nb_seen_examples (0), nb_removed (0),
00584     n_pro (0), n_rep (0), n_opt (0),
00585     w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
00586     batch_mode(true), step(0)
00587 {
00588 }
00589 
00590 CLaRank::CLaRank (float64_t C, CKernel* k, CLabels* lab):
00591     CMultiClassSVM(ONE_VS_REST, C, k, lab), 
00592     nb_seen_examples (0), nb_removed (0),
00593     n_pro (0), n_rep (0), n_opt (0),
00594     w_pro (1), w_rep (1), w_opt (1), y0 (0), dual (0),
00595     batch_mode(true), step(0)
00596 {
00597 }
00598 
00599 CLaRank::~CLaRank ()
00600 {
00601     destroy();
00602 }
00603 
00604 bool CLaRank::train_machine(CFeatures* data)
00605 {
00606     tau = 0.0001;
00607 
00608     ASSERT(kernel);
00609     ASSERT(labels && labels->get_num_labels());
00610 
00611     CSignal::clear_cancel();
00612 
00613     if (data)
00614     {
00615         if (data->get_num_vectors() != labels->get_num_labels())
00616         {
00617             SG_ERROR("Numbert of vectors (%d) does not match number of labels (%d)\n",
00618                     data->get_num_vectors(), labels->get_num_labels());
00619         }
00620         kernel->init(data, data);
00621     }
00622 
00623     ASSERT(kernel->get_num_vec_lhs() && kernel->get_num_vec_rhs());
00624 
00625     nb_train=labels->get_num_labels();
00626     cache = kernel->get_cache_size();
00627 
00628     int32_t n_it = 1;
00629     float64_t gap = DBL_MAX;
00630 
00631     SG_INFO("Training on %d examples\n", nb_train);
00632     while (gap > C1 && (!CSignal::cancel_computations()))      // stopping criteria
00633     {
00634         float64_t tr_err = 0;
00635         int32_t ind = step;
00636         for (int32_t i = 0; i < nb_train; i++)
00637         {
00638             int32_t y=labels->get_label(i);
00639             if (add (i, y) != y)   // call the add function
00640                 tr_err++;
00641 
00642             if (ind && i / ind)
00643             {
00644                 SG_DEBUG("Done: %d %% Train error (online): %f%%\n",
00645                         (int32_t) (((float64_t) i) / nb_train * 100), (tr_err / ((float64_t) i + 1)) * 100);
00646                 ind += step;
00647             }
00648         }
00649 
00650         SG_DEBUG("End of iteration %d\n", n_it++);
00651         SG_DEBUG("Train error (online): %f%%\n", (tr_err / nb_train) * 100);
00652         gap = computeGap ();
00653         SG_ABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(DBL_MAX), -CMath::log10(C1), 6);
00654 
00655         if (!batch_mode)        // skip stopping criteria if online mode
00656             gap = 0;
00657     }
00658     SG_DONE();
00659 
00660     int32_t num_classes = outputs.size();
00661     create_multiclass_svm(num_classes);
00662     SG_DEBUG("%d classes\n", num_classes);
00663 
00664     // Used for saving a model file
00665     int32_t i=0;
00666     for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
00667     {
00668         const LaRankOutput* o=&(it->second);
00669 
00670         larank_kcache_t* k=o->getKernel();
00671         int32_t l=o->get_l();
00672         float32_t* beta=o->getBetas();
00673         int32_t *r2i = larank_kcache_r2i (k, l);
00674 
00675         ASSERT(l>0);
00676         SG_DEBUG("svm[%d] has %d sv, b=%f\n", i, l, 0.0);
00677 
00678         CSVM* svm=new CSVM(l);
00679 
00680         for (int32_t j=0; j<l; j++)
00681         {
00682             svm->set_alpha(j, beta[j]);
00683             svm->set_support_vector(j, r2i[j]);
00684         }
00685 
00686         svm->set_bias(0);
00687         set_svm(i, svm);
00688         i++;
00689     }
00690     destroy();
00691 
00692     return true;
00693 }
00694 
00695 // LEARNING FUNCTION: add new patterns and run optimization steps selected with adaptative schedule
00696 int32_t CLaRank::add (int32_t x_id, int32_t yi)
00697 {
00698     ++nb_seen_examples;
00699     // create a new output object if this one has never been seen before 
00700     if (!getOutput (yi))
00701     {
00702         outputs.insert (std::make_pair (yi, LaRankOutput ()));
00703         LaRankOutput *cur = getOutput (yi);
00704         cur->initialize (kernel, cache);
00705         if (outputs.size () == 1)
00706             y0 = outputs.begin ()->first;
00707         // link the cache of this new output to a buddy 
00708         if (outputs.size () > 1)
00709         {
00710             LaRankOutput *out0 = getOutput (y0);
00711             cur->set_kernel_buddy (out0->getKernel ());
00712         }
00713     }
00714 
00715     LaRankPattern tpattern (x_id, yi);
00716     LaRankPattern & pattern = (patterns.isPattern (x_id)) ? patterns.getPattern (x_id) : tpattern;
00717 
00718     // ProcessNew with the "fresh" pattern
00719     float64_t time1 = CTime::get_curtime();
00720     process_return_t pro_ret = process (pattern, processNew);
00721     float64_t dual_increase = pro_ret.dual_increase;
00722     float64_t duration = (CTime::get_curtime() - time1);
00723     float64_t coeff = dual_increase / (0.00001 + duration);
00724     dual += dual_increase;
00725     n_pro++;
00726     w_pro = 0.05 * coeff + (1 - 0.05) * w_pro;
00727 
00728     // ProcessOld & Optimize until ready for a new processnew
00729     // (Adaptative schedule here)
00730     for (;;)
00731     {
00732         float64_t w_sum = w_pro + w_rep + w_opt;
00733         float64_t prop_min = w_sum / 20;
00734         if (w_pro < prop_min)
00735             w_pro = prop_min;
00736         if (w_rep < prop_min)
00737             w_rep = prop_min;
00738         if (w_opt < prop_min)
00739             w_opt = prop_min;
00740         w_sum = w_pro + w_rep + w_opt;
00741         float64_t r = CMath::random(0.0, w_sum);
00742         if (r <= w_pro)
00743         {
00744             break;
00745         }
00746         else if ((r > w_pro) && (r <= w_pro + w_rep))   // ProcessOld here
00747         {
00748             float64_t ltime1 = CTime::get_curtime ();
00749             float64_t ldual_increase = reprocess ();
00750             float64_t lduration = (CTime::get_curtime () - ltime1);
00751             float64_t lcoeff = ldual_increase / (0.00001 + lduration);
00752             dual += ldual_increase;
00753             n_rep++;
00754             w_rep = 0.05 * lcoeff + (1 - 0.05) * w_rep;
00755         }
00756         else            // Optimize here 
00757         {
00758             float64_t ltime1 = CTime::get_curtime ();
00759             float64_t ldual_increase = optimize ();
00760             float64_t lduration = (CTime::get_curtime () - ltime1);
00761             float64_t lcoeff = ldual_increase / (0.00001 + lduration);
00762             dual += ldual_increase;
00763             n_opt++;
00764             w_opt = 0.05 * lcoeff + (1 - 0.05) * w_opt;
00765         }
00766     }
00767     if (nb_seen_examples % 100 == 0)    // Cleanup useless Support Vectors/Patterns sometimes
00768         nb_removed += cleanup ();
00769     return pro_ret.ypred;
00770 }
00771 
00772 // PREDICTION FUNCTION: main function in la_rank_classify
00773 int32_t CLaRank::predict (int32_t x_id)
00774 {
00775     int32_t res = -1;
00776     float64_t score_max = -DBL_MAX;
00777     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
00778     {
00779         float64_t score = it->second.computeScore (x_id);
00780         if (score > score_max)
00781         {
00782             score_max = score;
00783             res = it->first;
00784         }
00785     }
00786     return res;
00787 }
00788 
00789 void CLaRank::destroy ()
00790 {
00791     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end ();++it)
00792         it->second.destroy ();
00793 }
00794 
00795 
00796 // Compute Duality gap (costly but used in stopping criteria in batch mode)                     
00797 float64_t CLaRank::computeGap ()
00798 {
00799     float64_t sum_sl = 0;
00800     float64_t sum_bi = 0;
00801     for (uint32_t i = 0; i < patterns.maxcount (); ++i)
00802     {
00803         const LaRankPattern & p = patterns[i];
00804         if (!p.exists ())
00805             continue;
00806         LaRankOutput *out = getOutput (p.y);
00807         if (!out)
00808             continue;
00809         sum_bi += out->getBeta (p.x_id);
00810         float64_t gi = out->computeGradient (p.x_id, p.y, p.y);
00811         float64_t gmin = DBL_MAX;
00812         for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
00813         {
00814             if (it->first != p.y && it->second.isSupportVector (p.x_id))
00815             {
00816                 float64_t g =
00817                     it->second.computeGradient (p.x_id, p.y, it->first);
00818                 if (g < gmin)
00819                     gmin = g;
00820             }
00821         }
00822         sum_sl += CMath::max (0.0, gi - gmin);
00823     }
00824     return CMath::max (0.0, computeW2 () + C1 * sum_sl - sum_bi);
00825 }
00826 
00827 // Nuber of classes so far
00828 uint32_t CLaRank::getNumOutputs () const
00829 {
00830     return outputs.size ();
00831 }
00832 
00833 // Number of Support Vectors
00834 int32_t CLaRank::getNSV ()
00835 {
00836     int32_t res = 0;
00837     for (outputhash_t::const_iterator it = outputs.begin (); it != outputs.end (); ++it)
00838     {
00839         float32_t* sv=NULL;
00840         res += it->second.getSV (sv);
00841         SG_FREE(sv);
00842     }
00843     return res;
00844 }
00845 
00846 // Norm of the parameters vector
00847 float64_t CLaRank::computeW2 ()
00848 {
00849     float64_t res = 0;
00850     for (uint32_t i = 0; i < patterns.maxcount (); ++i)
00851     {
00852         const LaRankPattern & p = patterns[i];
00853         if (!p.exists ())
00854             continue;
00855         for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
00856             if (it->second.getBeta (p.x_id))
00857                 res += it->second.getBeta (p.x_id) * it->second.computeScore (p.x_id);
00858     }
00859     return res;
00860 }
00861 
00862 // Compute Dual objective value
00863 float64_t CLaRank::getDual ()
00864 {
00865     float64_t res = 0;
00866     for (uint32_t i = 0; i < patterns.maxcount (); ++i)
00867     {
00868         const LaRankPattern & p = patterns[i];
00869         if (!p.exists ())
00870             continue;
00871         LaRankOutput *out = getOutput (p.y);
00872         if (!out)
00873             continue;
00874         res += out->getBeta (p.x_id);
00875     }
00876     return res - computeW2 () / 2;
00877 }
00878 
00879 LaRankOutput *CLaRank::getOutput (int32_t index)
00880 {
00881     outputhash_t::iterator it = outputs.find (index);
00882     return it == outputs.end ()? NULL : &it->second;
00883 }
00884 
00885 // IMPORTANT Main SMO optimization step
00886 CLaRank::process_return_t CLaRank::process (const LaRankPattern & pattern, process_type ptype)
00887 {
00888     process_return_t pro_ret = process_return_t (0, 0);
00889 
00890     /*
00891      ** compute gradient and sort   
00892      */
00893     std::vector < outputgradient_t > outputgradients;
00894 
00895     outputgradients.reserve (getNumOutputs ());
00896 
00897     std::vector < outputgradient_t > outputscores;
00898     outputscores.reserve (getNumOutputs ());
00899 
00900     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
00901     {
00902         if (ptype != processOptimize
00903                 || it->second.isSupportVector (pattern.x_id))
00904         {
00905             float64_t g =
00906                 it->second.computeGradient (pattern.x_id, pattern.y, it->first);
00907             outputgradients.push_back (outputgradient_t (it->first, g));
00908             if (it->first == pattern.y)
00909                 outputscores.push_back (outputgradient_t (it->first, (1 - g)));
00910             else
00911                 outputscores.push_back (outputgradient_t (it->first, -g));
00912         }
00913     }
00914 
00915     std::sort (outputgradients.begin (), outputgradients.end ());
00916 
00917     /*
00918      ** determine the prediction
00919      */
00920     std::sort (outputscores.begin (), outputscores.end ());
00921     pro_ret.ypred = outputscores[0].output;
00922 
00923     /*
00924      ** Find yp (1st part of the pair)
00925      */
00926     outputgradient_t ygp;
00927     LaRankOutput *outp = NULL;
00928     uint32_t p;
00929     for (p = 0; p < outputgradients.size (); ++p)
00930     {
00931         outputgradient_t & current = outputgradients[p];
00932         LaRankOutput *output = getOutput (current.output);
00933         bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
00934         bool goodclass = current.output == pattern.y;
00935         if ((!support && goodclass) ||
00936                 (support && output->getBeta (pattern.x_id) < (goodclass ? C1 : 0)))
00937         {
00938             ygp = current;
00939             outp = output;
00940             break;
00941         }
00942     }
00943     if (p == outputgradients.size ())
00944         return pro_ret;
00945 
00946     /*
00947      ** Find ym (2nd part of the pair)
00948      */
00949     outputgradient_t ygm;
00950     LaRankOutput *outm = NULL;
00951     int32_t m;
00952     for (m = outputgradients.size () - 1; m >= 0; --m)
00953     {
00954         outputgradient_t & current = outputgradients[m];
00955         LaRankOutput *output = getOutput (current.output);
00956         bool support = ptype == processOptimize || output->isSupportVector (pattern.x_id);
00957         bool goodclass = current.output == pattern.y;
00958         if (!goodclass || (support && output->getBeta (pattern.x_id) > 0))
00959         {
00960             ygm = current;
00961             outm = output;
00962             break;
00963         }
00964     }
00965     if (m < 0)
00966         return pro_ret;
00967 
00968     /*
00969      ** Throw or Insert pattern
00970      */
00971     if ((ygp.gradient - ygm.gradient) < tau)
00972         return pro_ret;
00973     if (ptype == processNew)
00974         patterns.insert (pattern);
00975 
00976     /*
00977      ** compute lambda and clip it
00978      */
00979     float64_t kii = outp->getKii (pattern.x_id);
00980     float64_t lambda = (ygp.gradient - ygm.gradient) / (2 * kii);
00981     if (ptype == processOptimize || outp->isSupportVector (pattern.x_id))
00982     {
00983         float64_t beta = outp->getBeta (pattern.x_id);
00984         if (ygp.output == pattern.y)
00985             lambda = CMath::min (lambda, C1 - beta);
00986         else
00987             lambda = CMath::min (lambda, fabs (beta));
00988     }
00989     else
00990         lambda = CMath::min (lambda, C1);
00991 
00992     /*
00993      ** update the solution
00994      */
00995     outp->update (pattern.x_id, lambda, ygp.gradient);
00996     outm->update (pattern.x_id, -lambda, ygm.gradient);
00997 
00998     pro_ret.dual_increase = lambda * ((ygp.gradient - ygm.gradient) - lambda * kii);
00999     return pro_ret;
01000 }
01001 
01002 // ProcessOld
01003 float64_t CLaRank::reprocess ()
01004 {
01005     if (patterns.size ())
01006     {
01007         for (int32_t n = 0; n < 10; ++n)
01008         {
01009             process_return_t pro_ret = process (patterns.sample (), processOld);
01010             if (pro_ret.dual_increase)
01011                 return pro_ret.dual_increase;
01012         }
01013     }
01014     return 0;
01015 }
01016 
01017 // Optimize
01018 float64_t CLaRank::optimize ()
01019 {
01020     float64_t dual_increase = 0;
01021     if (patterns.size ())
01022     {
01023         for (int32_t n = 0; n < 10; ++n)
01024         {
01025             process_return_t pro_ret =
01026                 process (patterns.sample(), processOptimize);
01027             dual_increase += pro_ret.dual_increase;
01028         }
01029     }
01030     return dual_increase;
01031 }
01032 
01033 // remove patterns and return the number of patterns that were removed
01034 uint32_t CLaRank::cleanup ()
01035 {
01036     /*
01037     for (outputhash_t::iterator it = outputs.begin (); it != outputs.end (); ++it)
01038         it->second.cleanup ();
01039 
01040     uint32_t res = 0;
01041     for (uint32_t i = 0; i < patterns.size (); ++i)
01042     {
01043         LaRankPattern & p = patterns[i];
01044         if (p.exists () && !outputs[p.y].isSupportVector (p.x_id))
01045         {
01046             patterns.remove (i);
01047             ++res;
01048         }
01049     }
01050     return res;
01051     */
01052     return 0;
01053 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation