en/latest/LaRank_8h_source.html

 // -*- C++ -*-

 // Main functions of the LaRank algorithm for soving Multiclass SVM

 // Copyright (C) 2008- Antoine Bordes

 // Shogun specific adjustments (w) 2009 Soeren Sonnenburg


 // This library is free software; you can redistribute it and/or

 // modify it under the terms of the GNU Lesser General Public

 // License as published by the Free Software Foundation; either

 // version 2.1 of the License, or (at your option) any later version.

 //

 // This program is distributed in the hope that it will be useful,

 // but WITHOUT ANY WARRANTY; without even the implied warranty of

 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 // GNU General Public License for more details.

 //

 // You should have received a copy of the GNU Lesser General Public

 // License along with this library; if not, write to the Free Software

 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 //

 /***********************************************************************

  *

  *  LUSH Lisp Universal Shell

  *    Copyright (C) 2002 Leon Bottou, Yann Le Cun, AT&T Corp, NECI.

  *  Includes parts of TL3:

  *    Copyright (C) 1987-1999 Leon Bottou and Neuristique.

  *  Includes selected parts of SN3.2:

  *    Copyright (C) 1991-2001 AT&T Corp.

  *

  *  This program is free software; you can redistribute it and/or modify

  *  it under the terms of the GNU General Public License as published by

  *  the Free Software Foundation; either version 2 of the License, or

  *  (at your option) any later version.

  *

  *  This program is distributed in the hope that it will be useful,

  *  but WITHOUT ANY WARRANTY; without even the implied warranty of

  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

  *  GNU General Public License for more details.

  *

  *  You should have received a copy of the GNU General Public License

  *  along with this program; if not, write to the Free Software

  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA

  *

  ***********************************************************************/


 /***********************************************************************

  * $Id: kcache.h,v 1.8 2007/01/25 22:42:09 leonb Exp $

  **********************************************************************/


 #ifndef LARANK_H

 #define LARANK_H


 #include <vector>

 #include <set>

 #include <map>

 #define STDEXT_NAMESPACE __gnu_cxx

 #define std_hash_map std::map

 #define std_hash_set std::set


 #include <shogun/lib/config.h>


 #include <shogun/io/SGIO.h>

 #include <shogun/kernel/Kernel.h>

 #include <shogun/multiclass/MulticlassSVM.h>


 namespace shogun

 {

 #ifndef DOXYGEN_SHOULD_SKIP_THIS

     struct larank_kcache_s;

     typedef struct larank_kcache_s larank_kcache_t;

     struct larank_kcache_s

     {

         CKernel* func;

         larank_kcache_t *prevbuddy;

         larank_kcache_t *nextbuddy;

         int64_t maxsize;

         int64_t cursize;

         int32_t l;

         int32_t *i2r;

         int32_t *r2i;

         int32_t maxrowlen;

         /* Rows */

         int32_t *rsize;

         float32_t *rdiag;

         float32_t **rdata;

         int32_t *rnext;

         int32_t *rprev;

         int32_t *qnext;

         int32_t *qprev;

     };


     /*

      ** OUTPUT: one per class of the raining set, keep tracks of support

      * vectors and their beta coefficients

      */

     class LaRankOutput

     {

         public:

             LaRankOutput () : beta(NULL), g(NULL), kernel(NULL), l(0)

         {

         }

             virtual ~LaRankOutput ()

             {

                 destroy();

             }


             // Initializing an output class (basically creating a kernel cache for it)

             void initialize (CKernel* kfunc, int64_t cache);


             // Destroying an output class (basically destroying the kernel cache)

             void destroy ();


             // !Important! Computing the score of a given input vector for the actual output

             float64_t computeScore (int32_t x_id);


             // !Important! Computing the gradient of a given input vector for the actual output

             float64_t computeGradient (int32_t xi_id, int32_t yi, int32_t ythis);


             // Updating the solution in the actual output

             void update (int32_t x_id, float64_t lambda, float64_t gp);


             // Linking the cache of this output to the cache of an other "buddy" output

             // so that if a requested value is not found in this cache, you can

             // ask your buddy if it has it.

             void set_kernel_buddy (larank_kcache_t * bud);


             // Removing useless support vectors (for which beta=0)

             int32_t cleanup ();


             // --- Below are information or "get" functions --- //


             //

             inline larank_kcache_t *getKernel () const

             {

                 return kernel;

             }

             //

             inline int32_t get_l () const

             {

                 return l;

             }


             //

             float64_t getW2 ();


             //

             float64_t getKii (int32_t x_id);


             //

             float64_t getBeta (int32_t x_id);


             //

             inline float32_t* getBetas () const

             {

                 return beta;

             }


             //

             float64_t getGradient (int32_t x_id);


             //

             bool isSupportVector (int32_t x_id) const;


             //

             int32_t getSV (float32_t* &sv) const;


         private:

             // the solution of LaRank relative to the actual class is stored in

             // this parameters

             float32_t* beta;        // Beta coefficiens

             float32_t* g;       // Strored gradient derivatives

             larank_kcache_t *kernel;    // Cache for kernel values

             int32_t l;          // Number of support vectors

     };


     /*

      ** LARANKPATTERN: to keep track of the support patterns

      */

     class LaRankPattern

     {

         public:

             LaRankPattern (int32_t x_index, int32_t label)

                 : x_id (x_index), y (label) {}

             LaRankPattern ()

                 : x_id (0) {}


             bool exists () const

             {

                 return x_id >= 0;

             }


             void clear ()

             {

                 x_id = -1;

             }


             int32_t x_id;

             int32_t y;

     };


     /*

      **  LARANKPATTERNS: the collection of support patterns

      */

     class LaRankPatterns

     {

         public:

             LaRankPatterns () {}

             ~LaRankPatterns () {}


             void insert (const LaRankPattern & pattern)

             {

                 if (!isPattern (pattern.x_id))

                 {

                     if (freeidx.size ())

                     {

                         std_hash_set < uint32_t >::iterator it = freeidx.begin ();

                         patterns[*it] = pattern;

                         x_id2rank[pattern.x_id] = *it;

                         freeidx.erase (it);

                     }

                     else

                     {

                         patterns.push_back (pattern);

                         x_id2rank[pattern.x_id] = patterns.size () - 1;

                     }

                 }

                 else

                 {

                     int32_t rank = getPatternRank (pattern.x_id);

                     patterns[rank] = pattern;

                 }

             }


             void remove (uint32_t i)

             {

                 x_id2rank[patterns[i].x_id] = 0;

                 patterns[i].clear ();

                 freeidx.insert (i);

             }


             bool empty () const

             {

                 return patterns.size () == freeidx.size ();

             }


             uint32_t size () const

             {

                 return patterns.size () - freeidx.size ();

             }


             LaRankPattern & sample ()

             {

                 ASSERT (!empty ())

                 while (true)

                 {

                     uint32_t r = CMath::random(uint32_t(0), uint32_t(patterns.size ()-1));

                     if (patterns[r].exists ())

                         return patterns[r];

                 }

                 return patterns[0];

             }


             uint32_t getPatternRank (int32_t x_id)

             {

                 return x_id2rank[x_id];

             }


             bool isPattern (int32_t x_id)

             {

                 return x_id2rank[x_id] != 0;

             }


             LaRankPattern & getPattern (int32_t x_id)

             {

                 uint32_t rank = x_id2rank[x_id];

                 return patterns[rank];

             }


             uint32_t maxcount () const

             {

                 return patterns.size ();

             }


             LaRankPattern & operator [] (uint32_t i)

             {

                 return patterns[i];

             }


             const LaRankPattern & operator [] (uint32_t i) const

             {

                 return patterns[i];

             }


         private:

             std_hash_set < uint32_t >freeidx;

             std::vector < LaRankPattern > patterns;

             std_hash_map < int32_t, uint32_t >x_id2rank;

     };


 #endif // DOXYGEN_SHOULD_SKIP_THIS


     class CLaRank:  public CMulticlassSVM

     {

         public:

             CLaRank ();


             CLaRank(float64_t C, CKernel* k, CLabels* lab);


             virtual ~CLaRank ();


             // LEARNING FUNCTION: add new patterns and run optimization steps

             // selected with adaptative schedule

             virtual int32_t add (int32_t x_id, int32_t yi);


             // PREDICTION FUNCTION: main function in la_rank_classify

             virtual int32_t predict (int32_t x_id);


             virtual void destroy ();


             // Compute Duality gap (costly but used in stopping criteria in batch mode)

             virtual float64_t computeGap ();


             // Nuber of classes so far

             virtual uint32_t getNumOutputs () const;


             // Number of Support Vectors

             int32_t getNSV ();


             // Norm of the parameters vector

             float64_t computeW2 ();


             // Compute Dual objective value

             float64_t getDual ();


             virtual EMachineType get_classifier_type() { return CT_LARANK; }


             virtual const char* get_name() const { return "LaRank"; }


             void set_batch_mode(bool enable) { batch_mode=enable; };

             bool get_batch_mode() { return batch_mode; };

             void set_tau(float64_t t) { tau=t; };

             float64_t get_tau() { return tau; };


             void set_max_iteration(int32_t max_iter);


             int32_t get_max_iteration() { return max_iteration; }


         protected:

             bool train_machine(CFeatures* data);


         private:

             /*

              ** MAIN DARK OPTIMIZATION PROCESSES

              */


             // Hash Table used to store the different outputs

             typedef std_hash_map < int32_t, LaRankOutput > outputhash_t;    // class index -> LaRankOutput


             outputhash_t outputs;


             LaRankOutput *getOutput (int32_t index);


             //

             LaRankPatterns patterns;


             // Parameters

             int32_t nb_seen_examples;

             int32_t nb_removed;


             // Numbers of each operation performed so far

             int32_t n_pro;

             int32_t n_rep;

             int32_t n_opt;


             // Running estimates for each operations

             float64_t w_pro;

             float64_t w_rep;

             float64_t w_opt;


             int32_t y0;

             float64_t m_dual;


             struct outputgradient_t

             {

                 outputgradient_t (int32_t result_output, float64_t result_gradient)

                     : output (result_output), gradient (result_gradient) {}

                 outputgradient_t ()

                     : output (0), gradient (0) {}


                 int32_t output;

                 float64_t gradient;


                 bool operator < (const outputgradient_t & og) const

                 {

                     return gradient > og.gradient;

                 }

             };


             //3 types of operations in LaRank

             enum process_type

             {

                 processNew,

                 processOld,

                 processOptimize

             };


             struct process_return_t

             {

                 process_return_t (float64_t dual, int32_t yprediction)

                     : dual_increase (dual), ypred (yprediction) {}

                 process_return_t () {}

                 float64_t dual_increase;

                 int32_t ypred;

             };


             // IMPORTANT Main SMO optimization step

             process_return_t process (const LaRankPattern & pattern, process_type ptype);


             // ProcessOld

             float64_t reprocess ();


             // Optimize

             float64_t optimize ();


             // remove patterns and return the number of patterns that were removed

             uint32_t cleanup ();


         protected:


             std_hash_set < int32_t >classes;


             inline uint32_t class_count () const

             {

                 return classes.size ();

             }


             float64_t tau;


             int32_t nb_train;

             int64_t cache;

             bool batch_mode;


             int32_t step;


             int32_t max_iteration;

     };

 }

 #endif // LARANK_H

shogun::CLaRank::get_name
virtual const char * get_name() const
Definition: LaRank.h:379

shogun::EMachineType
EMachineType
Definition: Machine.h:33

shogun::CLaRank::batch_mode
bool batch_mode
whether to use online learning or batch training
Definition: LaRank.h:508

shogun::operator<
bool operator<(const BaseTag &first, const BaseTag &second)
Definition: basetag.h:125

shogun::CLaRank::destroy
virtual void destroy()
Definition: LaRank.cpp:803

shogun::CLaRank::getNSV
int32_t getNSV()
Definition: LaRank.cpp:858

shogun::CT_LARANK
Definition: Machine.h:69

shogun::CLabels
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43

shogun::CLaRank::cache
int64_t cache
cache
Definition: LaRank.h:506

shogun::CLaRank::train_machine
bool train_machine(CFeatures *data)
Definition: LaRank.cpp:609

shogun::CLaRank::set_batch_mode
void set_batch_mode(bool enable)
Definition: LaRank.h:384

SGIO.h

shogun::CLaRank::tau
float64_t tau
tau
Definition: LaRank.h:501

shogun::CLaRank::max_iteration
int32_t max_iteration
Max number of iterations before training is stopped.
Definition: LaRank.h:514

update
void(* update)(float *foo, float bar)
Definition: JLCoverTree.h:529

Kernel.h

config.h

shogun::CLaRank::add
virtual int32_t add(int32_t x_id, int32_t yi)
Definition: LaRank.cpp:710

shogun::CLaRank::class_count
uint32_t class_count() const
class count
Definition: LaRank.h:495

shogun::CLaRank::get_classifier_type
virtual EMachineType get_classifier_type()
Definition: LaRank.h:376

shogun::CLaRank::get_tau
float64_t get_tau()
Definition: LaRank.h:394

shogun::CLaRank::computeGap
virtual float64_t computeGap()
Definition: LaRank.cpp:812

shogun::CMath::random
static uint64_t random()
Definition: Math.h:1019

shogun::CLaRank::computeW2
float64_t computeW2()
Definition: LaRank.cpp:871

shogun::CLaRank::nb_train
int32_t nb_train
nb train
Definition: LaRank.h:504

ASSERT
#define ASSERT(x)
Definition: SGIO.h:201

shogun::CLaRank::getDual
float64_t getDual()
Definition: LaRank.cpp:887

shogun::CMulticlassSVM
class MultiClassSVM
Definition: MulticlassSVM.h:28

shogun::CLaRank::~CLaRank
virtual ~CLaRank()
Definition: LaRank.cpp:604

float64_t
double float64_t
Definition: common.h:50

shogun::CLaRank::get_batch_mode
bool get_batch_mode()
Definition: LaRank.h:386

shogun::CLaRank::getNumOutputs
virtual uint32_t getNumOutputs() const
Definition: LaRank.cpp:843

float32_t
float float32_t
Definition: common.h:49

shogun
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18

MulticlassSVM.h

shogun::CFeatures
The class Features is the base class of all feature objects.
Definition: Features.h:68

shogun::CLaRank::predict
virtual int32_t predict(int32_t x_id)
Definition: LaRank.cpp:787

shogun::CLaRank
the LaRank multiclass SVM machine This implementation uses LaRank algorithm from Bordes, Antoine, et al., 2007. "Solving multiclass support vector machines with LaRank."
Definition: LaRank.h:318

shogun::CKernel
The Kernel base class.
Definition: Kernel.h:159

shogun::CLaRank::set_tau
void set_tau(float64_t t)
Definition: LaRank.h:390

shogun::CLaRank::classes
std_hash_set< int32_t > classes
classes
Definition: LaRank.h:492

shogun::CLaRank::step
int32_t step
progess output
Definition: LaRank.h:511

shogun::CLaRank::get_max_iteration
int32_t get_max_iteration()
Definition: LaRank.h:404

shogun::CLaRank::CLaRank
CLaRank()
Definition: LaRank.cpp:587

shogun::CLaRank::set_max_iteration
void set_max_iteration(int32_t max_iter)
Definition: LaRank.cpp:849