SVM_libsvm.cpp

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * 1. Redistributions of source code must retain the above copyright
00010  * notice, this list of conditions and the following disclaimer.
00011  *
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  * notice, this list of conditions and the following disclaimer in the
00014  * documentation and/or other materials provided with the distribution.
00015  *
00016  * 3. Neither name of copyright holders nor the names of its contributors
00017  * may be used to endorse or promote products derived from this software
00018  * without specific prior written permission.
00019  *
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
00025  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * Shogun specific adjustments (w) 2006-2009 Soeren Sonnenburg
00034  */
00035 
00036 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00037 
00038 #include "lib/memory.h"
00039 #include "classifier/svm/SVM_libsvm.h"
00040 #include "kernel/Kernel.h"
00041 #include "lib/io.h"
00042 #include "lib/Time.h"
00043 #include "lib/Signal.h"
00044 #include "lib/common.h"
00045 #include "lib/Mathematics.h"
00046 
00047 #include <stdio.h>
00048 #include <stdlib.h>
00049 #include <ctype.h>
00050 #include <float.h>
00051 #include <string.h>
00052 #include <stdarg.h>
00053 
00054 namespace shogun
00055 {
00056 
00057 typedef KERNELCACHE_ELEM Qfloat;
00058 typedef float64_t schar;
00059 
00060 template <class S, class T> inline void clone(T*& dst, S* src, int32_t n)
00061 {
00062     dst = new T[n];
00063     memcpy((void *)dst,(void *)src,sizeof(T)*n);
00064 }
00065 #define INF HUGE_VAL
00066 #define TAU 1e-12
00067 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00068 
00069 class QMatrix;
00070 class SVC_QMC;
00071 
00072 //
00073 // Kernel Cache
00074 //
00075 // l is the number of total data items
00076 // size is the cache size limit in bytes
00077 //
00078 class Cache
00079 {
00080 public:
00081     Cache(int32_t l, int64_t size);
00082     ~Cache();
00083 
00084     // request data [0,len)
00085     // return some position p where [p,len) need to be filled
00086     // (p >= len if nothing needs to be filled)
00087     int32_t get_data(const int32_t index, Qfloat **data, int32_t len);
00088     void swap_index(int32_t i, int32_t j);  // future_option
00089 
00090 private:
00091     int32_t l;
00092     int64_t size;
00093     struct head_t
00094     {
00095         head_t *prev, *next;    // a circular list
00096         Qfloat *data;
00097         int32_t len;        // data[0,len) is cached in this entry
00098     };
00099 
00100     head_t *head;
00101     head_t lru_head;
00102     void lru_delete(head_t *h);
00103     void lru_insert(head_t *h);
00104 };
00105 
00106 Cache::Cache(int32_t l_, int64_t size_):l(l_),size(size_)
00107 {
00108     head = (head_t *)calloc(l,sizeof(head_t));  // initialized to 0
00109     size /= sizeof(Qfloat);
00110     size -= l * sizeof(head_t) / sizeof(Qfloat);
00111     size = CMath::max(size, (int64_t) 2*l); // cache must be large enough for two columns
00112     lru_head.next = lru_head.prev = &lru_head;
00113 }
00114 
00115 Cache::~Cache()
00116 {
00117     for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00118         free(h->data);
00119     free(head);
00120 }
00121 
00122 void Cache::lru_delete(head_t *h)
00123 {
00124     // delete from current location
00125     h->prev->next = h->next;
00126     h->next->prev = h->prev;
00127 }
00128 
00129 void Cache::lru_insert(head_t *h)
00130 {
00131     // insert to last position
00132     h->next = &lru_head;
00133     h->prev = lru_head.prev;
00134     h->prev->next = h;
00135     h->next->prev = h;
00136 }
00137 
00138 int32_t Cache::get_data(const int32_t index, Qfloat **data, int32_t len)
00139 {
00140     head_t *h = &head[index];
00141     if(h->len) lru_delete(h);
00142     int32_t more = len - h->len;
00143 
00144     if(more > 0)
00145     {
00146         // free old space
00147         while(size < more)
00148         {
00149             head_t *old = lru_head.next;
00150             lru_delete(old);
00151             free(old->data);
00152             size += old->len;
00153             old->data = 0;
00154             old->len = 0;
00155         }
00156 
00157         // allocate new space
00158         h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00159         size -= more;
00160         CMath::swap(h->len,len);
00161     }
00162 
00163     lru_insert(h);
00164     *data = h->data;
00165     return len;
00166 }
00167 
00168 void Cache::swap_index(int32_t i, int32_t j)
00169 {
00170     if(i==j) return;
00171 
00172     if(head[i].len) lru_delete(&head[i]);
00173     if(head[j].len) lru_delete(&head[j]);
00174     CMath::swap(head[i].data,head[j].data);
00175     CMath::swap(head[i].len,head[j].len);
00176     if(head[i].len) lru_insert(&head[i]);
00177     if(head[j].len) lru_insert(&head[j]);
00178 
00179     if(i>j) CMath::swap(i,j);
00180     for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00181     {
00182         if(h->len > i)
00183         {
00184             if(h->len > j)
00185                 CMath::swap(h->data[i],h->data[j]);
00186             else
00187             {
00188                 // give up
00189                 lru_delete(h);
00190                 free(h->data);
00191                 size += h->len;
00192                 h->data = 0;
00193                 h->len = 0;
00194             }
00195         }
00196     }
00197 }
00198 
00199 //
00200 // Kernel evaluation
00201 //
00202 // the static method k_function is for doing single kernel evaluation
00203 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00204 // the member function get_Q is for getting one column from the Q Matrix
00205 //
00206 class QMatrix {
00207 public:
00208     virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
00209     virtual Qfloat *get_QD() const = 0;
00210     virtual void swap_index(int32_t i, int32_t j) const = 0;
00211     virtual ~QMatrix() {}
00212 
00213     float64_t max_train_time;
00214 };
00215 
00216 class LibSVMKernel: public QMatrix {
00217 public:
00218     LibSVMKernel(int32_t l, svm_node * const * x, const svm_parameter& param);
00219     virtual ~LibSVMKernel();
00220 
00221     virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
00222     virtual Qfloat *get_QD() const = 0;
00223     virtual void swap_index(int32_t i, int32_t j) const // no so const...
00224     {
00225         CMath::swap(x[i],x[j]);
00226         if(x_square) CMath::swap(x_square[i],x_square[j]);
00227     }
00228 
00229     inline float64_t kernel_function(int32_t i, int32_t j) const
00230     {
00231         return kernel->kernel(x[i]->index,x[j]->index);
00232     }
00233 
00234 private:
00235     CKernel* kernel;
00236     const svm_node **x;
00237     float64_t *x_square;
00238 
00239     // svm_parameter
00240     const int32_t kernel_type;
00241     const int32_t degree;
00242     const float64_t gamma;
00243     const float64_t coef0;
00244 };
00245 
00246 LibSVMKernel::LibSVMKernel(int32_t l, svm_node * const * x_, const svm_parameter& param)
00247 : kernel_type(param.kernel_type), degree(param.degree),
00248  gamma(param.gamma), coef0(param.coef0)
00249 {
00250     clone(x,x_,l);
00251     x_square = 0;
00252     kernel=param.kernel;
00253     max_train_time=param.max_train_time;
00254 }
00255 
00256 LibSVMKernel::~LibSVMKernel()
00257 {
00258     delete[] x;
00259     delete[] x_square;
00260 }
00261 
00262 // Generalized SMO+SVMlight algorithm
00263 // Solves:
00264 //
00265 //  min 0.5(\alpha^T Q \alpha) + b^T \alpha
00266 //
00267 //      y^T \alpha = \delta
00268 //      y_i = +1 or -1
00269 //      0 <= alpha_i <= Cp for y_i = 1
00270 //      0 <= alpha_i <= Cn for y_i = -1
00271 //
00272 // Given:
00273 //
00274 //  Q, p, y, Cp, Cn, and an initial feasible point \alpha
00275 //  l is the size of vectors and matrices
00276 //  eps is the stopping tolerance
00277 //
00278 // solution will be put in \alpha, objective value will be put in obj
00279 //
00280 class Solver {
00281 public:
00282     Solver() {};
00283     virtual ~Solver() {};
00284 
00285     struct SolutionInfo {
00286         float64_t obj;
00287         float64_t rho;
00288         float64_t upper_bound_p;
00289         float64_t upper_bound_n;
00290         float64_t r;    // for Solver_NU
00291     };
00292 
00293     void Solve(
00294         int32_t l, const QMatrix& Q, const float64_t *p_, const schar *y_,
00295         float64_t *alpha_, float64_t Cp, float64_t Cn, float64_t eps,
00296         SolutionInfo* si, int32_t shrinking, bool use_bias);
00297 
00298 protected:
00299     int32_t active_size;
00300     schar *y;
00301     float64_t *G;       // gradient of objective function
00302     enum { LOWER_BOUND, UPPER_BOUND, FREE };
00303     char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
00304     float64_t *alpha;
00305     const QMatrix *Q;
00306     const Qfloat *QD;
00307     float64_t eps;
00308     float64_t Cp,Cn;
00309     float64_t *p;
00310     int32_t *active_set;
00311     float64_t *G_bar;       // gradient, if we treat free variables as 0
00312     int32_t l;
00313     bool unshrink;  // XXX
00314 
00315     float64_t get_C(int32_t i)
00316     {
00317         return (y[i] > 0)? Cp : Cn;
00318     }
00319     void update_alpha_status(int32_t i)
00320     {
00321         if(alpha[i] >= get_C(i))
00322             alpha_status[i] = UPPER_BOUND;
00323         else if(alpha[i] <= 0)
00324             alpha_status[i] = LOWER_BOUND;
00325         else alpha_status[i] = FREE;
00326     }
00327     bool is_upper_bound(int32_t i) { return alpha_status[i] == UPPER_BOUND; }
00328     bool is_lower_bound(int32_t i) { return alpha_status[i] == LOWER_BOUND; }
00329     bool is_free(int32_t i) { return alpha_status[i] == FREE; }
00330     void swap_index(int32_t i, int32_t j);
00331     void reconstruct_gradient();
00332     virtual int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
00333     virtual float64_t calculate_rho();
00334     virtual void do_shrinking();
00335 private:
00336     bool be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2);
00337 };
00338 
00339 void Solver::swap_index(int32_t i, int32_t j)
00340 {
00341     Q->swap_index(i,j);
00342     CMath::swap(y[i],y[j]);
00343     CMath::swap(G[i],G[j]);
00344     CMath::swap(alpha_status[i],alpha_status[j]);
00345     CMath::swap(alpha[i],alpha[j]);
00346     CMath::swap(p[i],p[j]);
00347     CMath::swap(active_set[i],active_set[j]);
00348     CMath::swap(G_bar[i],G_bar[j]);
00349 }
00350 
00351 void Solver::reconstruct_gradient()
00352 {
00353     // reconstruct inactive elements of G from G_bar and free variables
00354 
00355     if(active_size == l) return;
00356 
00357     int32_t i,j;
00358     int32_t nr_free = 0;
00359 
00360     for(j=active_size;j<l;j++)
00361         G[j] = G_bar[j] + p[j];
00362 
00363     for(j=0;j<active_size;j++)
00364         if(is_free(j))
00365             nr_free++;
00366 
00367     if (nr_free*l > 2*active_size*(l-active_size))
00368     {
00369         for(i=active_size;i<l;i++)
00370         {
00371             const Qfloat *Q_i = Q->get_Q(i,active_size);
00372             for(j=0;j<active_size;j++)
00373                 if(is_free(j))
00374                     G[i] += alpha[j] * Q_i[j];
00375         }
00376     }
00377     else
00378     {
00379         for(i=0;i<active_size;i++)
00380             if(is_free(i))
00381             {
00382                 const Qfloat *Q_i = Q->get_Q(i,l);
00383                 float64_t alpha_i = alpha[i];
00384                 for(j=active_size;j<l;j++)
00385                     G[j] += alpha_i * Q_i[j];
00386             }
00387     }
00388 }
00389 
00390 void Solver::Solve(
00391     int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
00392     const schar *p_y, float64_t *p_alpha, float64_t p_Cp, float64_t p_Cn,
00393     float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
00394 {
00395     this->l = p_l;
00396     this->Q = &p_Q;
00397     QD=Q->get_QD();
00398     clone(p, p_p,l);
00399     clone(y, p_y,l);
00400     clone(alpha,p_alpha,l);
00401     this->Cp = p_Cp;
00402     this->Cn = p_Cn;
00403     this->eps = p_eps;
00404     unshrink = false;
00405 
00406     // initialize alpha_status
00407     {
00408         alpha_status = new char[l];
00409         for(int32_t i=0;i<l;i++)
00410             update_alpha_status(i);
00411     }
00412 
00413     // initialize active set (for shrinking)
00414     {
00415         active_set = new int32_t[l];
00416         for(int32_t i=0;i<l;i++)
00417             active_set[i] = i;
00418         active_size = l;
00419     }
00420 
00421     // initialize gradient
00422     CSignal::clear_cancel();
00423     CTime start_time;
00424     {
00425         G = new float64_t[l];
00426         G_bar = new float64_t[l];
00427         int32_t i;
00428         for(i=0;i<l;i++)
00429         {
00430             G[i] = p_p[i];
00431             G_bar[i] = 0;
00432         }
00433         SG_SINFO("Computing gradient for initial set of non-zero alphas\n");
00434         //CMath::display_vector(alpha, l, "alphas");
00435         for(i=0;i<l && !CSignal::cancel_computations(); i++)
00436         {
00437             if(!is_lower_bound(i))
00438             {
00439                 const Qfloat *Q_i = Q->get_Q(i,l);
00440                 float64_t alpha_i = alpha[i];
00441                 int32_t j;
00442                 for(j=0;j<l;j++)
00443                     G[j] += alpha_i*Q_i[j];
00444                 if(is_upper_bound(i))
00445                     for(j=0;j<l;j++)
00446                         G_bar[j] += get_C(i) * Q_i[j];
00447             }
00448             SG_SPROGRESS(i, 0, l);
00449         }
00450         SG_SDONE();
00451     }
00452 
00453     // optimization step
00454 
00455     int32_t iter = 0;
00456     int32_t counter = CMath::min(l,1000)+1;
00457 
00458     while (!CSignal::cancel_computations())
00459     {
00460         if (Q->max_train_time > 0 && start_time.cur_time_diff() > Q->max_train_time)
00461           break;
00462 
00463         // show progress and do shrinking
00464 
00465         if(--counter == 0)
00466         {
00467             counter = CMath::min(l,1000);
00468             if(shrinking) do_shrinking();
00469             //SG_SINFO(".");
00470         }
00471 
00472         int32_t i,j;
00473         float64_t gap;
00474         if(select_working_set(i,j, gap)!=0)
00475         {
00476             // reconstruct the whole gradient
00477             reconstruct_gradient();
00478             // reset active set size and check
00479             active_size = l;
00480             //SG_SINFO("*");
00481             if(select_working_set(i,j, gap)!=0)
00482                 break;
00483             else
00484                 counter = 1;    // do shrinking next iteration
00485         }
00486 
00487         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00488 
00489         ++iter;
00490 
00491         // update alpha[i] and alpha[j], handle bounds carefully
00492 
00493         const Qfloat *Q_i = Q->get_Q(i,active_size);
00494         const Qfloat *Q_j = Q->get_Q(j,active_size);
00495 
00496         float64_t C_i = get_C(i);
00497         float64_t C_j = get_C(j);
00498 
00499         float64_t old_alpha_i = alpha[i];
00500         float64_t old_alpha_j = alpha[j];
00501 
00502         if (!use_bias)
00503         {
00504             double pi=G[i]-Q_i[i]*alpha[i]-Q_i[j]*alpha[j];
00505             double pj=G[j]-Q_i[j]*alpha[i]-Q_j[j]*alpha[j];
00506             double det=Q_i[i]*Q_j[j]-Q_i[j]*Q_i[j];
00507             double alpha_i=-(Q_j[j]*pi-Q_i[j]*pj)/det;
00508             alpha_i=CMath::min(C_i,CMath::max(0.0,alpha_i));
00509             double alpha_j=-(-Q_i[j]*pi+Q_i[i]*pj)/det;
00510             alpha_j=CMath::min(C_j,CMath::max(0.0,alpha_j));
00511 
00512             if (alpha_i==0 || alpha_i == C_i)
00513                 alpha_j=CMath::min(C_j,CMath::max(0.0,-(pj+Q_i[j]*alpha_i)/Q_j[j]));
00514             if (alpha_j==0 || alpha_j == C_j)
00515                 alpha_i=CMath::min(C_i,CMath::max(0.0,-(pi+Q_i[j]*alpha_j)/Q_i[i]));
00516 
00517             alpha[i]=alpha_i; alpha[j]=alpha_j;
00518         }
00519         else
00520         {
00521             if(y[i]!=y[j])
00522             {
00523                 float64_t quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
00524                 if (quad_coef <= 0)
00525                     quad_coef = TAU;
00526                 float64_t delta = (-G[i]-G[j])/quad_coef;
00527                 float64_t diff = alpha[i] - alpha[j];
00528                 alpha[i] += delta;
00529                 alpha[j] += delta;
00530 
00531                 if(diff > 0)
00532                 {
00533                     if(alpha[j] < 0)
00534                     {
00535                         alpha[j] = 0;
00536                         alpha[i] = diff;
00537                     }
00538                 }
00539                 else
00540                 {
00541                     if(alpha[i] < 0)
00542                     {
00543                         alpha[i] = 0;
00544                         alpha[j] = -diff;
00545                     }
00546                 }
00547                 if(diff > C_i - C_j)
00548                 {
00549                     if(alpha[i] > C_i)
00550                     {
00551                         alpha[i] = C_i;
00552                         alpha[j] = C_i - diff;
00553                     }
00554                 }
00555                 else
00556                 {
00557                     if(alpha[j] > C_j)
00558                     {
00559                         alpha[j] = C_j;
00560                         alpha[i] = C_j + diff;
00561                     }
00562                 }
00563             }
00564             else
00565             {
00566                 float64_t quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
00567                 if (quad_coef <= 0)
00568                     quad_coef = TAU;
00569                 float64_t delta = (G[i]-G[j])/quad_coef;
00570                 float64_t sum = alpha[i] + alpha[j];
00571                 alpha[i] -= delta;
00572                 alpha[j] += delta;
00573 
00574                 if(sum > C_i)
00575                 {
00576                     if(alpha[i] > C_i)
00577                     {
00578                         alpha[i] = C_i;
00579                         alpha[j] = sum - C_i;
00580                     }
00581                 }
00582                 else
00583                 {
00584                     if(alpha[j] < 0)
00585                     {
00586                         alpha[j] = 0;
00587                         alpha[i] = sum;
00588                     }
00589                 }
00590                 if(sum > C_j)
00591                 {
00592                     if(alpha[j] > C_j)
00593                     {
00594                         alpha[j] = C_j;
00595                         alpha[i] = sum - C_j;
00596                     }
00597                 }
00598                 else
00599                 {
00600                     if(alpha[i] < 0)
00601                     {
00602                         alpha[i] = 0;
00603                         alpha[j] = sum;
00604                     }
00605                 }
00606             }
00607         }
00608 
00609         // update G
00610 
00611         float64_t delta_alpha_i = alpha[i] - old_alpha_i;
00612         float64_t delta_alpha_j = alpha[j] - old_alpha_j;
00613 
00614         for(int32_t k=0;k<active_size;k++)
00615         {
00616             G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00617         }
00618 
00619         // update alpha_status and G_bar
00620 
00621         {
00622             bool ui = is_upper_bound(i);
00623             bool uj = is_upper_bound(j);
00624             update_alpha_status(i);
00625             update_alpha_status(j);
00626             int32_t k;
00627             if(ui != is_upper_bound(i))
00628             {
00629                 Q_i = Q->get_Q(i,l);
00630                 if(ui)
00631                     for(k=0;k<l;k++)
00632                         G_bar[k] -= C_i * Q_i[k];
00633                 else
00634                     for(k=0;k<l;k++)
00635                         G_bar[k] += C_i * Q_i[k];
00636             }
00637 
00638             if(uj != is_upper_bound(j))
00639             {
00640                 Q_j = Q->get_Q(j,l);
00641                 if(uj)
00642                     for(k=0;k<l;k++)
00643                         G_bar[k] -= C_j * Q_j[k];
00644                 else
00645                     for(k=0;k<l;k++)
00646                         G_bar[k] += C_j * Q_j[k];
00647             }
00648         }
00649 
00650 #ifdef MCSVM_DEBUG
00651         // calculate objective value
00652         {
00653             float64_t v = 0;
00654             for(i=0;i<l;i++)
00655                 v += alpha[i] * (G[i] + p[i]);
00656 
00657             p_si->obj = v/2;
00658 
00659             float64_t primal=0;
00660             //float64_t gap=100000;
00661             SG_SPRINT("dual obj=%f primal obf=%f gap=%f\n", v/2, primal, gap);
00662         }
00663 #endif
00664     }
00665 
00666     // calculate rho
00667 
00668     if (!use_bias)
00669         p_si->rho = 0;
00670     else
00671         p_si->rho = calculate_rho();
00672 
00673     // calculate objective value
00674     {
00675         float64_t v = 0;
00676         int32_t i;
00677         for(i=0;i<l;i++)
00678             v += alpha[i] * (G[i] + p[i]);
00679 
00680         p_si->obj = v/2;
00681     }
00682 
00683     // put back the solution
00684     {
00685         for(int32_t i=0;i<l;i++)
00686             p_alpha[active_set[i]] = alpha[i];
00687     }
00688 
00689     p_si->upper_bound_p = Cp;
00690     p_si->upper_bound_n = Cn;
00691 
00692     SG_SINFO("\noptimization finished, #iter = %d\n",iter);
00693 
00694     delete[] p;
00695     delete[] y;
00696     delete[] alpha;
00697     delete[] alpha_status;
00698     delete[] active_set;
00699     delete[] G;
00700     delete[] G_bar;
00701 }
00702 
00703 // return 1 if already optimal, return 0 otherwise
00704 int32_t Solver::select_working_set(
00705     int32_t &out_i, int32_t &out_j, float64_t &gap)
00706 {
00707     // return i,j such that
00708     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00709     // j: minimizes the decrease of obj value
00710     //    (if quadratic coefficient <= 0, replace it with tau)
00711     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00712 
00713     float64_t Gmax = -INF;
00714     float64_t Gmax2 = -INF;
00715     int32_t Gmax_idx = -1;
00716     int32_t Gmin_idx = -1;
00717     float64_t obj_diff_min = INF;
00718 
00719     for(int32_t t=0;t<active_size;t++)
00720         if(y[t]==+1)
00721         {
00722             if(!is_upper_bound(t))
00723                 if(-G[t] >= Gmax)
00724                 {
00725                     Gmax = -G[t];
00726                     Gmax_idx = t;
00727                 }
00728         }
00729         else
00730         {
00731             if(!is_lower_bound(t))
00732                 if(G[t] >= Gmax)
00733                 {
00734                     Gmax = G[t];
00735                     Gmax_idx = t;
00736                 }
00737         }
00738 
00739     int32_t i = Gmax_idx;
00740     const Qfloat *Q_i = NULL;
00741     if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00742         Q_i = Q->get_Q(i,active_size);
00743 
00744     for(int32_t j=0;j<active_size;j++)
00745     {
00746         if(y[j]==+1)
00747         {
00748             if (!is_lower_bound(j))
00749             {
00750                 float64_t grad_diff=Gmax+G[j];
00751                 if (G[j] >= Gmax2)
00752                     Gmax2 = G[j];
00753                 if (grad_diff > 0)
00754                 {
00755                     float64_t obj_diff;
00756                     float64_t quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
00757                     if (quad_coef > 0)
00758                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00759                     else
00760                         obj_diff = -(grad_diff*grad_diff)/TAU;
00761 
00762                     if (obj_diff <= obj_diff_min)
00763                     {
00764                         Gmin_idx=j;
00765                         obj_diff_min = obj_diff;
00766                     }
00767                 }
00768             }
00769         }
00770         else
00771         {
00772             if (!is_upper_bound(j))
00773             {
00774                 float64_t grad_diff= Gmax-G[j];
00775                 if (-G[j] >= Gmax2)
00776                     Gmax2 = -G[j];
00777                 if (grad_diff > 0)
00778                 {
00779                     float64_t obj_diff;
00780                     float64_t quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
00781                     if (quad_coef > 0)
00782                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00783                     else
00784                         obj_diff = -(grad_diff*grad_diff)/TAU;
00785 
00786                     if (obj_diff <= obj_diff_min)
00787                     {
00788                         Gmin_idx=j;
00789                         obj_diff_min = obj_diff;
00790                     }
00791                 }
00792             }
00793         }
00794     }
00795 
00796     gap=Gmax+Gmax2;
00797     if(gap < eps)
00798         return 1;
00799 
00800     out_i = Gmax_idx;
00801     out_j = Gmin_idx;
00802     return 0;
00803 }
00804 
00805 bool Solver::be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2)
00806 {
00807     if(is_upper_bound(i))
00808     {
00809         if(y[i]==+1)
00810             return(-G[i] > Gmax1);
00811         else
00812             return(-G[i] > Gmax2);
00813     }
00814     else if(is_lower_bound(i))
00815     {
00816         if(y[i]==+1)
00817             return(G[i] > Gmax2);
00818         else
00819             return(G[i] > Gmax1);
00820     }
00821     else
00822         return(false);
00823 }
00824 
00825 
00826 void Solver::do_shrinking()
00827 {
00828     int32_t i;
00829     float64_t Gmax1 = -INF;     // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00830     float64_t Gmax2 = -INF;     // max { y_i * grad(f)_i | i in I_low(\alpha) }
00831 
00832     // find maximal violating pair first
00833     for(i=0;i<active_size;i++)
00834     {
00835         if(y[i]==+1)
00836         {
00837             if(!is_upper_bound(i))
00838             {
00839                 if(-G[i] >= Gmax1)
00840                     Gmax1 = -G[i];
00841             }
00842             if(!is_lower_bound(i))
00843             {
00844                 if(G[i] >= Gmax2)
00845                     Gmax2 = G[i];
00846             }
00847         }
00848         else
00849         {
00850             if(!is_upper_bound(i))
00851             {
00852                 if(-G[i] >= Gmax2)
00853                     Gmax2 = -G[i];
00854             }
00855             if(!is_lower_bound(i))
00856             {
00857                 if(G[i] >= Gmax1)
00858                     Gmax1 = G[i];
00859             }
00860         }
00861     }
00862 
00863     if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
00864     {
00865         unshrink = true;
00866         reconstruct_gradient();
00867         active_size = l;
00868     }
00869 
00870     for(i=0;i<active_size;i++)
00871         if (be_shrunk(i, Gmax1, Gmax2))
00872         {
00873             active_size--;
00874             while (active_size > i)
00875             {
00876                 if (!be_shrunk(active_size, Gmax1, Gmax2))
00877                 {
00878                     swap_index(i,active_size);
00879                     break;
00880                 }
00881                 active_size--;
00882             }
00883         }
00884 }
00885 
00886 float64_t Solver::calculate_rho()
00887 {
00888     float64_t r;
00889     int32_t nr_free = 0;
00890     float64_t ub = INF, lb = -INF, sum_free = 0;
00891     for(int32_t i=0;i<active_size;i++)
00892     {
00893         float64_t yG = y[i]*G[i];
00894 
00895         if(is_upper_bound(i))
00896         {
00897             if(y[i]==-1)
00898                 ub = CMath::min(ub,yG);
00899             else
00900                 lb = CMath::max(lb,yG);
00901         }
00902         else if(is_lower_bound(i))
00903         {
00904             if(y[i]==+1)
00905                 ub = CMath::min(ub,yG);
00906             else
00907                 lb = CMath::max(lb,yG);
00908         }
00909         else
00910         {
00911             ++nr_free;
00912             sum_free += yG;
00913         }
00914     }
00915 
00916     if(nr_free>0)
00917         r = sum_free/nr_free;
00918     else
00919         r = (ub+lb)/2;
00920 
00921     return r;
00922 }
00923 
00924 
00925 //
00926 //Solve with individually weighted examples
00927 //
00928 class WeightedSolver : public Solver
00929 {
00930 
00931 public:
00932 
00933     WeightedSolver(float64_t* cost_vec)
00934     {
00935 
00936         this->Cs = cost_vec;
00937 
00938     }
00939 
00940     virtual float64_t get_C(int32_t i)
00941     {
00942 
00943         return Cs[i];
00944     }
00945 
00946 protected:
00947 
00948   float64_t* Cs;
00949 
00950 };
00951 
00952 
00953 //
00954 // Solver for nu-svm classification and regression
00955 //
00956 // additional constraint: e^T \alpha = constant
00957 //
00958 class Solver_NU : public Solver
00959 {
00960 public:
00961     Solver_NU() {}
00962     void Solve(
00963         int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
00964         const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
00965         float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
00966     {
00967         this->si = p_si;
00968         Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,
00969                 shrinking,use_bias);
00970     }
00971 private:
00972     SolutionInfo *si;
00973     int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
00974     float64_t calculate_rho();
00975     bool be_shrunk(
00976         int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
00977         float64_t Gmax4);
00978     void do_shrinking();
00979 };
00980 
00981 // return 1 if already optimal, return 0 otherwise
00982 int32_t Solver_NU::select_working_set(
00983     int32_t &out_i, int32_t &out_j, float64_t &gap)
00984 {
00985     // return i,j such that y_i = y_j and
00986     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00987     // j: minimizes the decrease of obj value
00988     //    (if quadratic coefficient <= 0, replace it with tau)
00989     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00990 
00991     float64_t Gmaxp = -INF;
00992     float64_t Gmaxp2 = -INF;
00993     int32_t Gmaxp_idx = -1;
00994 
00995     float64_t Gmaxn = -INF;
00996     float64_t Gmaxn2 = -INF;
00997     int32_t Gmaxn_idx = -1;
00998 
00999     int32_t Gmin_idx = -1;
01000     float64_t obj_diff_min = INF;
01001 
01002     for(int32_t t=0;t<active_size;t++)
01003         if(y[t]==+1)
01004         {
01005             if(!is_upper_bound(t))
01006                 if(-G[t] >= Gmaxp)
01007                 {
01008                     Gmaxp = -G[t];
01009                     Gmaxp_idx = t;
01010                 }
01011         }
01012         else
01013         {
01014             if(!is_lower_bound(t))
01015                 if(G[t] >= Gmaxn)
01016                 {
01017                     Gmaxn = G[t];
01018                     Gmaxn_idx = t;
01019                 }
01020         }
01021 
01022     int32_t ip = Gmaxp_idx;
01023     int32_t in = Gmaxn_idx;
01024     const Qfloat *Q_ip = NULL;
01025     const Qfloat *Q_in = NULL;
01026     if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01027         Q_ip = Q->get_Q(ip,active_size);
01028     if(in != -1)
01029         Q_in = Q->get_Q(in,active_size);
01030 
01031     for(int32_t j=0;j<active_size;j++)
01032     {
01033         if(y[j]==+1)
01034         {
01035             if (!is_lower_bound(j))
01036             {
01037                 float64_t grad_diff=Gmaxp+G[j];
01038                 if (G[j] >= Gmaxp2)
01039                     Gmaxp2 = G[j];
01040                 if (grad_diff > 0)
01041                 {
01042                     float64_t obj_diff;
01043                     float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01044                     if (quad_coef > 0)
01045                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
01046                     else
01047                         obj_diff = -(grad_diff*grad_diff)/TAU;
01048 
01049                     if (obj_diff <= obj_diff_min)
01050                     {
01051                         Gmin_idx=j;
01052                         obj_diff_min = obj_diff;
01053                     }
01054                 }
01055             }
01056         }
01057         else
01058         {
01059             if (!is_upper_bound(j))
01060             {
01061                 float64_t grad_diff=Gmaxn-G[j];
01062                 if (-G[j] >= Gmaxn2)
01063                     Gmaxn2 = -G[j];
01064                 if (grad_diff > 0)
01065                 {
01066                     float64_t obj_diff;
01067                     float64_t quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
01068                     if (quad_coef > 0)
01069                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
01070                     else
01071                         obj_diff = -(grad_diff*grad_diff)/TAU;
01072 
01073                     if (obj_diff <= obj_diff_min)
01074                     {
01075                         Gmin_idx=j;
01076                         obj_diff_min = obj_diff;
01077                     }
01078                 }
01079             }
01080         }
01081     }
01082 
01083     gap=CMath::max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2);
01084     if(gap < eps)
01085         return 1;
01086 
01087     if (y[Gmin_idx] == +1)
01088         out_i = Gmaxp_idx;
01089     else
01090         out_i = Gmaxn_idx;
01091     out_j = Gmin_idx;
01092 
01093     return 0;
01094 }
01095 
01096 bool Solver_NU::be_shrunk(
01097     int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
01098     float64_t Gmax4)
01099 {
01100     if(is_upper_bound(i))
01101     {
01102         if(y[i]==+1)
01103             return(-G[i] > Gmax1);
01104         else
01105             return(-G[i] > Gmax4);
01106     }
01107     else if(is_lower_bound(i))
01108     {
01109         if(y[i]==+1)
01110             return(G[i] > Gmax2);
01111         else
01112             return(G[i] > Gmax3);
01113     }
01114     else
01115         return(false);
01116 }
01117 
01118 void Solver_NU::do_shrinking()
01119 {
01120     float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01121     float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01122     float64_t Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01123     float64_t Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01124 
01125     // find maximal violating pair first
01126     int32_t i;
01127     for(i=0;i<active_size;i++)
01128     {
01129         if(!is_upper_bound(i))
01130         {
01131             if(y[i]==+1)
01132             {
01133                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01134             }
01135             else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01136         }
01137         if(!is_lower_bound(i))
01138         {
01139             if(y[i]==+1)
01140             {
01141                 if(G[i] > Gmax2) Gmax2 = G[i];
01142             }
01143             else    if(G[i] > Gmax3) Gmax3 = G[i];
01144         }
01145     }
01146 
01147     if(unshrink == false && CMath::max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
01148     {
01149         unshrink = true;
01150         reconstruct_gradient();
01151         active_size = l;
01152     }
01153 
01154     for(i=0;i<active_size;i++)
01155         if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01156         {
01157             active_size--;
01158             while (active_size > i)
01159             {
01160                 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01161                 {
01162                     swap_index(i,active_size);
01163                     break;
01164                 }
01165                 active_size--;
01166             }
01167         }
01168 }
01169 
01170 float64_t Solver_NU::calculate_rho()
01171 {
01172     int32_t nr_free1 = 0,nr_free2 = 0;
01173     float64_t ub1 = INF, ub2 = INF;
01174     float64_t lb1 = -INF, lb2 = -INF;
01175     float64_t sum_free1 = 0, sum_free2 = 0;
01176 
01177     for(int32_t i=0; i<active_size; i++)
01178     {
01179         if(y[i]==+1)
01180         {
01181             if(is_upper_bound(i))
01182                 lb1 = CMath::max(lb1,G[i]);
01183             else if(is_lower_bound(i))
01184                 ub1 = CMath::min(ub1,G[i]);
01185             else
01186             {
01187                 ++nr_free1;
01188                 sum_free1 += G[i];
01189             }
01190         }
01191         else
01192         {
01193             if(is_upper_bound(i))
01194                 lb2 = CMath::max(lb2,G[i]);
01195             else if(is_lower_bound(i))
01196                 ub2 = CMath::min(ub2,G[i]);
01197             else
01198             {
01199                 ++nr_free2;
01200                 sum_free2 += G[i];
01201             }
01202         }
01203     }
01204 
01205     float64_t r1,r2;
01206     if(nr_free1 > 0)
01207         r1 = sum_free1/nr_free1;
01208     else
01209         r1 = (ub1+lb1)/2;
01210 
01211     if(nr_free2 > 0)
01212         r2 = sum_free2/nr_free2;
01213     else
01214         r2 = (ub2+lb2)/2;
01215 
01216     si->r = (r1+r2)/2;
01217     return (r1-r2)/2;
01218 }
01219 
01220 class SVC_QMC: public LibSVMKernel
01221 {
01222 public:
01223     SVC_QMC(const svm_problem& prob, const svm_parameter& param, const schar *y_, int32_t n_class, float64_t fac)
01224     :LibSVMKernel(prob.l, prob.x, param)
01225     {
01226         nr_class=n_class;
01227         factor=fac;
01228         clone(y,y_,prob.l);
01229         cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
01230         QD = new Qfloat[prob.l];
01231         for(int32_t i=0;i<prob.l;i++)
01232         {
01233             QD[i]= factor*(nr_class-1)*kernel_function(i,i);
01234         }
01235     }
01236 
01237     Qfloat *get_Q(int32_t i, int32_t len) const
01238     {
01239         Qfloat *data;
01240         int32_t start;
01241         if((start = cache->get_data(i,&data,len)) < len)
01242         {
01243             for(int32_t j=start;j<len;j++)
01244             {
01245                 if (y[i]==y[j])
01246                     data[j] = factor*(nr_class-1)*kernel_function(i,j);
01247                 else
01248                     data[j] = -factor*kernel_function(i,j);
01249             }
01250         }
01251         return data;
01252     }
01253 
01254     inline Qfloat get_orig_Qij(Qfloat Q, int32_t i, int32_t j)
01255     {
01256         if (y[i]==y[j])
01257             return Q/(nr_class-1);
01258         else
01259             return -Q;
01260     }
01261 
01262     Qfloat *get_QD() const
01263     {
01264         return QD;
01265     }
01266 
01267     void swap_index(int32_t i, int32_t j) const
01268     {
01269         cache->swap_index(i,j);
01270         LibSVMKernel::swap_index(i,j);
01271         CMath::swap(y[i],y[j]);
01272         CMath::swap(QD[i],QD[j]);
01273     }
01274 
01275     ~SVC_QMC()
01276     {
01277         delete[] y;
01278         delete cache;
01279         delete[] QD;
01280     }
01281 private:
01282     float64_t factor;
01283     float64_t nr_class;
01284     schar *y;
01285     Cache *cache;
01286     Qfloat *QD;
01287 };
01288 
01289 //
01290 // Solver for nu-svm classification and regression
01291 //
01292 // additional constraint: e^T \alpha = constant
01293 //
01294 class Solver_NUMC : public Solver
01295 {
01296 public:
01297     Solver_NUMC(int32_t n_class, float64_t svm_nu)
01298     {
01299         nr_class=n_class;
01300         nu=svm_nu;
01301     }
01302 
01303     void Solve(
01304         int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
01305         const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
01306         float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
01307     {
01308         this->si = p_si;
01309         Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking, use_bias);
01310     }
01311     float64_t compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases,float64_t* normwcw);
01312 
01313 private:
01314     SolutionInfo *si;
01315     int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
01316     float64_t calculate_rho();
01317     bool be_shrunk(
01318         int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
01319         float64_t Gmax4);
01320     void do_shrinking();
01321 
01322 private:
01323     int32_t nr_class;
01324     float64_t  nu;
01325 };
01326 
01327 float64_t Solver_NUMC::compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases, float64_t* normwcw)
01328 {
01329     clone(y, p_y,l);
01330     clone(alpha,p_alpha,l);
01331 
01332     alpha_status = new char[l];
01333     for(int32_t i=0;i<l;i++)
01334         update_alpha_status(i);
01335 
01336     float64_t* class_count = new float64_t[nr_class];
01337     float64_t* outputs = new float64_t[l];
01338 
01339     for (int32_t i=0; i<nr_class; i++)
01340     {
01341         class_count[i]=0;
01342         biases[i+1]=0;
01343     }
01344 
01345     for (int32_t i=0; i<active_size; i++)
01346     {
01347         update_alpha_status(i);
01348         if(!is_upper_bound(i) && !is_lower_bound(i))
01349             class_count[(int32_t) y[i]]++;
01350     }
01351 
01352     //CMath::display_vector(class_count, nr_class, "class_count");
01353 
01354     float64_t mu=((float64_t) nr_class)/(nu*l);
01355     //SG_SPRINT("nr_class=%d, l=%d, active_size=%d, nu=%f, mu=%f\n", nr_class, l, active_size, nu, mu);
01356 
01357     float64_t rho=0;
01358     float64_t quad=0;
01359     float64_t* zero_counts  = new float64_t[nr_class];
01360     float64_t normwc_const = 0;
01361 
01362     for (int32_t i=0; i<nr_class; i++)
01363     {
01364         zero_counts[i]=-INF;
01365         normwcw[i]=0;
01366     }
01367 
01368     for (int32_t i=0; i<active_size; i++)
01369     {
01370         float64_t sum_free=0;
01371         float64_t sum_atbound=0;
01372         float64_t sum_zero_count=0;
01373 
01374         Qfloat* Q_i = Q->get_Q(i,active_size);
01375         outputs[i]=0;
01376 
01377         for (int j=0; j<active_size; j++)
01378         {
01379             quad+= alpha[i]*alpha[j]*Q_i[j];
01380             float64_t tmp= alpha[j]*Q_i[j]/mu;
01381 
01382             if(!is_upper_bound(i) && !is_lower_bound(i))
01383                 sum_free+=tmp;
01384             else
01385                 sum_atbound+=tmp;
01386 
01387             if (class_count[(int32_t) y[i]] == 0 && y[j]==y[i])
01388                 sum_zero_count+= tmp;
01389 
01390             SVC_QMC* QMC=(SVC_QMC*) Q;
01391             float64_t norm_tmp=alpha[i]*alpha[j]*QMC->get_orig_Qij(Q_i[j], i, j);
01392             if (y[i]==y[j])
01393                 normwcw[(int32_t) y[i]]+=norm_tmp;
01394 
01395             normwcw[(int32_t) y[i]]-=2.0/nr_class*norm_tmp;
01396             normwc_const+=norm_tmp;
01397         }
01398 
01399         if (class_count[(int32_t) y[i]] == 0)
01400         {
01401             if (zero_counts[(int32_t) y[i]]<sum_zero_count)
01402                 zero_counts[(int32_t) y[i]]=sum_zero_count;
01403         }
01404 
01405         biases[(int32_t) y[i]+1]-=sum_free;
01406         if (class_count[(int32_t) y[i]] != 0.0)
01407             rho+=sum_free/class_count[(int32_t) y[i]];
01408         outputs[i]+=sum_free+sum_atbound;
01409     }
01410 
01411     for (int32_t i=0; i<nr_class; i++)
01412     {
01413         if (class_count[i] == 0.0)
01414             rho+=zero_counts[i];
01415 
01416         normwcw[i]+=normwc_const/CMath::sq(nr_class);
01417         normwcw[i]=CMath::sqrt(normwcw[i]);
01418     }
01419 
01420     CMath::display_vector(normwcw, nr_class, "normwcw");
01421 
01422     rho/=nr_class;
01423 
01424     SG_SPRINT("rho=%f\n", rho);
01425 
01426     float64_t sumb=0;
01427     for (int32_t i=0; i<nr_class; i++)
01428     {
01429         if (class_count[i] != 0.0)
01430             biases[i+1]=biases[i+1]/class_count[i]+rho;
01431         else
01432             biases[i+1]+=rho-zero_counts[i];
01433 
01434         SG_SPRINT("biases=%f\n", biases[i+1]);
01435 
01436         sumb+=biases[i+1];
01437     }
01438     SG_SPRINT("sumb=%f\n", sumb);
01439 
01440     delete[] zero_counts;
01441 
01442     for (int32_t i=0; i<l; i++)
01443         outputs[i]+=biases[(int32_t) y[i]+1];
01444 
01445     biases[0]=rho;
01446 
01447     //CMath::display_vector(outputs, l, "outputs");
01448 
01449 
01450     float64_t xi=0;
01451     for (int32_t i=0; i<active_size; i++)
01452     {
01453         if (is_lower_bound(i))
01454             continue;
01455         xi+=rho-outputs[i];
01456     }
01457 
01458     //SG_SPRINT("xi=%f\n", xi);
01459 
01460     //SG_SPRINT("quad=%f Cp=%f xi*mu=%f\n", quad, nr_class*rho, xi*mu);
01461 
01462     float64_t primal=0.5*quad- nr_class*rho+xi*mu;
01463 
01464     //SG_SPRINT("primal=%10.10f\n", primal);
01465 
01466     delete[] y;
01467     delete[] alpha;
01468     delete[] alpha_status;
01469 
01470     return primal;
01471 }
01472 
01473 
01474 // return 1 if already optimal, return 0 otherwise
01475 int32_t Solver_NUMC::select_working_set(
01476     int32_t &out_i, int32_t &out_j, float64_t &gap)
01477 {
01478     // return i,j such that y_i = y_j and
01479     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01480     // j: minimizes the decrease of obj value
01481     //    (if quadratic coefficient <= 0, replace it with tau)
01482     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01483 
01484     int32_t retval=0;
01485     float64_t best_gap=0;
01486     int32_t best_out_i=-1;
01487     int32_t best_out_j=-1;
01488 
01489     float64_t* Gmaxp = new float64_t[nr_class];
01490     float64_t* Gmaxp2 = new float64_t[nr_class];
01491     int32_t* Gmaxp_idx = new int32_t[nr_class];
01492 
01493     int32_t* Gmin_idx = new int32_t[nr_class];
01494     float64_t* obj_diff_min = new float64_t[nr_class];
01495 
01496     for (int32_t i=0; i<nr_class; i++)
01497     {
01498         Gmaxp[i]=-INF;
01499         Gmaxp2[i]=-INF;
01500         Gmaxp_idx[i]=-1;
01501         Gmin_idx[i]=-1;
01502         obj_diff_min[i]=INF;
01503     }
01504 
01505     for(int32_t t=0;t<active_size;t++)
01506     {
01507         int32_t cidx=y[t];
01508         if(!is_upper_bound(t))
01509         {
01510             if(-G[t] >= Gmaxp[cidx])
01511             {
01512                 Gmaxp[cidx] = -G[t];
01513                 Gmaxp_idx[cidx] = t;
01514             }
01515         }
01516     }
01517 
01518     for(int32_t j=0;j<active_size;j++)
01519     {
01520         int32_t cidx=y[j];
01521         int32_t ip = Gmaxp_idx[cidx];
01522         const Qfloat *Q_ip = NULL;
01523         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01524             Q_ip = Q->get_Q(ip,active_size);
01525 
01526         if (!is_lower_bound(j))
01527         {
01528             float64_t grad_diff=Gmaxp[cidx]+G[j];
01529             if (G[j] >= Gmaxp2[cidx])
01530                 Gmaxp2[cidx] = G[j];
01531             if (grad_diff > 0)
01532             {
01533                 float64_t obj_diff;
01534                 float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01535                 if (quad_coef > 0)
01536                     obj_diff = -(grad_diff*grad_diff)/quad_coef;
01537                 else
01538                     obj_diff = -(grad_diff*grad_diff)/TAU;
01539 
01540                 if (obj_diff <= obj_diff_min[cidx])
01541                 {
01542                     Gmin_idx[cidx]=j;
01543                     obj_diff_min[cidx] = obj_diff;
01544                 }
01545             }
01546         }
01547 
01548         gap=Gmaxp[cidx]+Gmaxp2[cidx];
01549         if (gap>=best_gap && Gmin_idx[cidx]>=0 &&
01550                 Gmaxp_idx[cidx]>=0 && Gmin_idx[cidx]<active_size)
01551         {
01552             out_i = Gmaxp_idx[cidx];
01553             out_j = Gmin_idx[cidx];
01554 
01555             best_gap=gap;
01556             best_out_i=out_i;
01557             best_out_j=out_j;
01558         }
01559     }
01560 
01561     gap=best_gap;
01562     out_i=best_out_i;
01563     out_j=best_out_j;
01564 
01565     SG_SDEBUG("i=%d j=%d best_gap=%f y_i=%f y_j=%f\n", out_i, out_j, gap, y[out_i], y[out_j]);
01566 
01567 
01568     if(gap < eps)
01569         retval=1;
01570 
01571     delete[] Gmaxp;
01572     delete[] Gmaxp2;
01573     delete[] Gmaxp_idx;
01574     delete[] Gmin_idx;
01575     delete[] obj_diff_min;
01576 
01577     return retval;
01578 }
01579 
01580 bool Solver_NUMC::be_shrunk(
01581     int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
01582     float64_t Gmax4)
01583 {
01584     return false;
01585 }
01586 
01587 void Solver_NUMC::do_shrinking()
01588 {
01589 }
01590 
01591 float64_t Solver_NUMC::calculate_rho()
01592 {
01593     return 0;
01594 }
01595 
01596 
01597 //
01598 // Q matrices for various formulations
01599 //
01600 class SVC_Q: public LibSVMKernel
01601 {
01602 public:
01603     SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01604     :LibSVMKernel(prob.l, prob.x, param)
01605     {
01606         clone(y,y_,prob.l);
01607         cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
01608         QD = new Qfloat[prob.l];
01609         for(int32_t i=0;i<prob.l;i++)
01610             QD[i]= (Qfloat)kernel_function(i,i);
01611     }
01612 
01613     Qfloat *get_Q(int32_t i, int32_t len) const
01614     {
01615         Qfloat *data;
01616         int32_t start;
01617         if((start = cache->get_data(i,&data,len)) < len)
01618         {
01619             for(int32_t j=start;j<len;j++)
01620                 data[j] = (Qfloat) y[i]*y[j]*kernel_function(i,j);
01621         }
01622         return data;
01623     }
01624 
01625     Qfloat *get_QD() const
01626     {
01627         return QD;
01628     }
01629 
01630     void swap_index(int32_t i, int32_t j) const
01631     {
01632         cache->swap_index(i,j);
01633         LibSVMKernel::swap_index(i,j);
01634         CMath::swap(y[i],y[j]);
01635         CMath::swap(QD[i],QD[j]);
01636     }
01637 
01638     ~SVC_Q()
01639     {
01640         delete[] y;
01641         delete cache;
01642         delete[] QD;
01643     }
01644 private:
01645     schar *y;
01646     Cache *cache;
01647     Qfloat *QD;
01648 };
01649 
01650 
01651 class ONE_CLASS_Q: public LibSVMKernel
01652 {
01653 public:
01654     ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01655     :LibSVMKernel(prob.l, prob.x, param)
01656     {
01657         cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
01658         QD = new Qfloat[prob.l];
01659         for(int32_t i=0;i<prob.l;i++)
01660             QD[i]= (Qfloat)kernel_function(i,i);
01661     }
01662 
01663     Qfloat *get_Q(int32_t i, int32_t len) const
01664     {
01665         Qfloat *data;
01666         int32_t start;
01667         if((start = cache->get_data(i,&data,len)) < len)
01668         {
01669             for(int32_t j=start;j<len;j++)
01670                 data[j] = (Qfloat) kernel_function(i,j);
01671         }
01672         return data;
01673     }
01674 
01675     Qfloat *get_QD() const
01676     {
01677         return QD;
01678     }
01679 
01680     void swap_index(int32_t i, int32_t j) const
01681     {
01682         cache->swap_index(i,j);
01683         LibSVMKernel::swap_index(i,j);
01684         CMath::swap(QD[i],QD[j]);
01685     }
01686 
01687     ~ONE_CLASS_Q()
01688     {
01689         delete cache;
01690         delete[] QD;
01691     }
01692 private:
01693     Cache *cache;
01694     Qfloat *QD;
01695 };
01696 
01697 class SVR_Q: public LibSVMKernel
01698 {
01699 public:
01700     SVR_Q(const svm_problem& prob, const svm_parameter& param)
01701     :LibSVMKernel(prob.l, prob.x, param)
01702     {
01703         l = prob.l;
01704         cache = new Cache(l,(int64_t)(param.cache_size*(1l<<20)));
01705         QD = new Qfloat[2*l];
01706         sign = new schar[2*l];
01707         index = new int32_t[2*l];
01708         for(int32_t k=0;k<l;k++)
01709         {
01710             sign[k] = 1;
01711             sign[k+l] = -1;
01712             index[k] = k;
01713             index[k+l] = k;
01714             QD[k]= (Qfloat)kernel_function(k,k);
01715             QD[k+l]=QD[k];
01716         }
01717         buffer[0] = new Qfloat[2*l];
01718         buffer[1] = new Qfloat[2*l];
01719         next_buffer = 0;
01720     }
01721 
01722     void swap_index(int32_t i, int32_t j) const
01723     {
01724         CMath::swap(sign[i],sign[j]);
01725         CMath::swap(index[i],index[j]);
01726         CMath::swap(QD[i],QD[j]);
01727     }
01728 
01729     Qfloat *get_Q(int32_t i, int32_t len) const
01730     {
01731         Qfloat *data;
01732         int32_t real_i = index[i];
01733         if(cache->get_data(real_i,&data,l) < l)
01734         {
01735             for(int32_t j=0;j<l;j++)
01736                 data[j] = (Qfloat)kernel_function(real_i,j);
01737         }
01738 
01739         // reorder and copy
01740         Qfloat *buf = buffer[next_buffer];
01741         next_buffer = 1 - next_buffer;
01742         schar si = sign[i];
01743         for(int32_t j=0;j<len;j++)
01744             buf[j] = si * sign[j] * data[index[j]];
01745         return buf;
01746     }
01747 
01748     Qfloat *get_QD() const
01749     {
01750         return QD;
01751     }
01752 
01753     ~SVR_Q()
01754     {
01755         delete cache;
01756         delete[] sign;
01757         delete[] index;
01758         delete[] buffer[0];
01759         delete[] buffer[1];
01760         delete[] QD;
01761     }
01762 
01763 private:
01764     int32_t l;
01765     Cache *cache;
01766     schar *sign;
01767     int32_t *index;
01768     mutable int32_t next_buffer;
01769     Qfloat *buffer[2];
01770     Qfloat *QD;
01771 };
01772 
01773 //
01774 // construct and solve various formulations
01775 //
01776 static void solve_c_svc(
01777     const svm_problem *prob, const svm_parameter* param,
01778     float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
01779 {
01780     int32_t l = prob->l;
01781     schar *y = new schar[l];
01782 
01783     int32_t i;
01784 
01785     for(i=0;i<l;i++)
01786     {
01787         alpha[i] = 0;
01788         if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01789     }
01790 
01791     Solver s;
01792     s.Solve(l, SVC_Q(*prob,*param,y), prob->pv, y,
01793         alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias);
01794 
01795     float64_t sum_alpha=0;
01796     for(i=0;i<l;i++)
01797         sum_alpha += alpha[i];
01798 
01799     if (Cp==Cn)
01800         SG_SINFO("nu = %f\n", sum_alpha/(param->C*prob->l));
01801 
01802     for(i=0;i<l;i++)
01803         alpha[i] *= y[i];
01804 
01805     delete[] y;
01806 }
01807 
01808 
01809 //two weighted datasets
01810 void solve_c_svc_weighted(
01811     const svm_problem *prob, const svm_parameter* param,
01812     float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
01813 {
01814     int l = prob->l;
01815     float64_t *minus_ones = new float64_t[l];
01816     schar *y = new schar[l];
01817 
01818     int i;
01819 
01820     for(i=0;i<l;i++)
01821     {
01822         alpha[i] = 0;
01823         minus_ones[i] = -1;
01824         if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01825     }
01826 
01827     WeightedSolver s = WeightedSolver(prob->C);
01828     s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01829         alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias);
01830 
01831     float64_t sum_alpha=0;
01832     for(i=0;i<l;i++)
01833         sum_alpha += alpha[i];
01834 
01835     //if (Cp==Cn)
01836     //  SG_SINFO("nu = %f\n", sum_alpha/(prob->C*prob->l));
01837 
01838     for(i=0;i<l;i++)
01839         alpha[i] *= y[i];
01840 
01841     delete[] minus_ones;
01842     delete[] y;
01843 }
01844 
01845 static void solve_nu_svc(
01846     const svm_problem *prob, const svm_parameter *param,
01847     float64_t *alpha, Solver::SolutionInfo* si)
01848 {
01849     int32_t i;
01850     int32_t l = prob->l;
01851     float64_t nu = param->nu;
01852 
01853     schar *y = new schar[l];
01854 
01855     for(i=0;i<l;i++)
01856         if(prob->y[i]>0)
01857             y[i] = +1;
01858         else
01859             y[i] = -1;
01860 
01861     float64_t sum_pos = nu*l/2;
01862     float64_t sum_neg = nu*l/2;
01863 
01864     for(i=0;i<l;i++)
01865         if(y[i] == +1)
01866         {
01867             alpha[i] = CMath::min(1.0,sum_pos);
01868             sum_pos -= alpha[i];
01869         }
01870         else
01871         {
01872             alpha[i] = CMath::min(1.0,sum_neg);
01873             sum_neg -= alpha[i];
01874         }
01875 
01876     float64_t *zeros = new float64_t[l];
01877 
01878     for(i=0;i<l;i++)
01879         zeros[i] = 0;
01880 
01881     Solver_NU s;
01882     s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01883         alpha, 1.0, 1.0, param->eps, si,  param->shrinking, param->use_bias);
01884     float64_t r = si->r;
01885 
01886     SG_SINFO("C = %f\n",1/r);
01887 
01888     for(i=0;i<l;i++)
01889         alpha[i] *= y[i]/r;
01890 
01891     si->rho /= r;
01892     si->obj /= (r*r);
01893     si->upper_bound_p = 1/r;
01894     si->upper_bound_n = 1/r;
01895 
01896     delete[] y;
01897     delete[] zeros;
01898 }
01899 
01900 static void solve_nu_multiclass_svc(const svm_problem *prob,
01901         const svm_parameter *param, Solver::SolutionInfo* si, svm_model* model)
01902 {
01903     int32_t l = prob->l;
01904     float64_t nu = param->nu;
01905 
01906     float64_t *alpha = Malloc(float64_t, prob->l);
01907     schar *y = new schar[l];
01908 
01909     for(int32_t i=0;i<l;i++)
01910     {
01911         alpha[i] = 0;
01912         y[i]=prob->y[i];
01913     }
01914 
01915     int32_t nr_class=param->nr_class;
01916     float64_t* sum_class = new float64_t[nr_class];
01917 
01918     for (int32_t j=0; j<nr_class; j++)
01919         sum_class[j] = nu*l/nr_class;
01920 
01921     for(int32_t i=0;i<l;i++)
01922     {
01923         alpha[i] = CMath::min(1.0,sum_class[int32_t(y[i])]);
01924         sum_class[int32_t(y[i])] -= alpha[i];
01925     }
01926     delete[] sum_class;
01927 
01928 
01929     float64_t *zeros = new float64_t[l];
01930 
01931     for (int32_t i=0;i<l;i++)
01932         zeros[i] = 0;
01933 
01934     Solver_NUMC s(nr_class, nu);
01935     SVC_QMC Q(*prob,*param,y, nr_class, ((float64_t) nr_class)/CMath::sq(nu*l));
01936 
01937     s.Solve(l, Q, zeros, y,
01938         alpha, 1.0, 1.0, param->eps, si,  param->shrinking, param->use_bias);
01939 
01940 
01941     int32_t* class_sv_count=new int32_t[nr_class];
01942 
01943     for (int32_t i=0; i<nr_class; i++)
01944         class_sv_count[i]=0;
01945 
01946     for (int32_t i=0; i<l; i++)
01947     {
01948         if (CMath::abs(alpha[i]) > 0)
01949             class_sv_count[(int32_t) y[i]]++;
01950     }
01951 
01952     model->l=l;
01953     // rho[0]= rho in mcsvm paper, rho[1]...rho[nr_class] is bias in mcsvm paper
01954     model->rho = Malloc(float64_t, nr_class+1);
01955     model->nr_class = nr_class;
01956     model->label = NULL;
01957     model->SV = Malloc(svm_node*,nr_class);
01958     model->nSV = Malloc(int32_t, nr_class);
01959     model->sv_coef = Malloc(float64_t *,nr_class);
01960     model->normwcw = Malloc(float64_t,nr_class);
01961 
01962     for (int32_t i=0; i<nr_class+1; i++)
01963         model->rho[i]=0;
01964 
01965     model->objective = si->obj;
01966 
01967     if (param->use_bias)
01968     {
01969         SG_SDEBUG("Computing biases and primal objective\n");
01970         float64_t primal = s.compute_primal(y, alpha, model->rho, model->normwcw);
01971         SG_SINFO("Primal = %10.10f\n", primal);
01972     }
01973 
01974     for (int32_t i=0; i<nr_class; i++)
01975     {
01976         model->nSV[i]=class_sv_count[i];
01977         model->SV[i] = Malloc(svm_node,class_sv_count[i]);
01978         model->sv_coef[i] = Malloc(float64_t,class_sv_count[i]);
01979         class_sv_count[i]=0;
01980     }
01981 
01982     for (int32_t i=0;i<prob->l;i++)
01983     {
01984         if(fabs(alpha[i]) > 0)
01985         {
01986             model->SV[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]].index = prob->x[i]->index;
01987             model->sv_coef[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]] = alpha[i];
01988             class_sv_count[(int32_t) y[i]]++;
01989         }
01990     }
01991 
01992     delete[] y;
01993     delete[] zeros;
01994     free(alpha);
01995 }
01996 
01997 static void solve_one_class(
01998     const svm_problem *prob, const svm_parameter *param,
01999     float64_t *alpha, Solver::SolutionInfo* si)
02000 {
02001     int32_t l = prob->l;
02002     float64_t *zeros = new float64_t[l];
02003     schar *ones = new schar[l];
02004     int32_t i;
02005 
02006     int32_t n = (int32_t)(param->nu*prob->l);   // # of alpha's at upper bound
02007 
02008     for(i=0;i<n;i++)
02009         alpha[i] = 1;
02010     if(n<prob->l)
02011         alpha[n] = param->nu * prob->l - n;
02012     for(i=n+1;i<l;i++)
02013         alpha[i] = 0;
02014 
02015     for(i=0;i<l;i++)
02016     {
02017         zeros[i] = 0;
02018         ones[i] = 1;
02019     }
02020 
02021     Solver s;
02022     s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
02023         alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias);
02024 
02025     delete[] zeros;
02026     delete[] ones;
02027 }
02028 
02029 static void solve_epsilon_svr(
02030     const svm_problem *prob, const svm_parameter *param,
02031     float64_t *alpha, Solver::SolutionInfo* si)
02032 {
02033     int32_t l = prob->l;
02034     float64_t *alpha2 = new float64_t[2*l];
02035     float64_t *linear_term = new float64_t[2*l];
02036     schar *y = new schar[2*l];
02037     int32_t i;
02038 
02039     for(i=0;i<l;i++)
02040     {
02041         alpha2[i] = 0;
02042         linear_term[i] = param->p - prob->y[i];
02043         y[i] = 1;
02044 
02045         alpha2[i+l] = 0;
02046         linear_term[i+l] = param->p + prob->y[i];
02047         y[i+l] = -1;
02048     }
02049 
02050     Solver s;
02051     s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
02052         alpha2, param->C, param->C, param->eps, si, param->shrinking, param->use_bias);
02053 
02054     float64_t sum_alpha = 0;
02055     for(i=0;i<l;i++)
02056     {
02057         alpha[i] = alpha2[i] - alpha2[i+l];
02058         sum_alpha += fabs(alpha[i]);
02059     }
02060     SG_SINFO("nu = %f\n",sum_alpha/(param->C*l));
02061 
02062     delete[] alpha2;
02063     delete[] linear_term;
02064     delete[] y;
02065 }
02066 
02067 static void solve_nu_svr(
02068     const svm_problem *prob, const svm_parameter *param,
02069     float64_t *alpha, Solver::SolutionInfo* si)
02070 {
02071     int32_t l = prob->l;
02072     float64_t C = param->C;
02073     float64_t *alpha2 = new float64_t[2*l];
02074     float64_t *linear_term = new float64_t[2*l];
02075     schar *y = new schar[2*l];
02076     int32_t i;
02077 
02078     float64_t sum = C * param->nu * l / 2;
02079     for(i=0;i<l;i++)
02080     {
02081         alpha2[i] = alpha2[i+l] = CMath::min(sum,C);
02082         sum -= alpha2[i];
02083 
02084         linear_term[i] = - prob->y[i];
02085         y[i] = 1;
02086 
02087         linear_term[i+l] = prob->y[i];
02088         y[i+l] = -1;
02089     }
02090 
02091     Solver_NU s;
02092     s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
02093         alpha2, C, C, param->eps, si, param->shrinking, param->use_bias);
02094 
02095     SG_SINFO("epsilon = %f\n",-si->r);
02096 
02097     for(i=0;i<l;i++)
02098         alpha[i] = alpha2[i] - alpha2[i+l];
02099 
02100     delete[] alpha2;
02101     delete[] linear_term;
02102     delete[] y;
02103 }
02104 
02105 //
02106 // decision_function
02107 //
02108 struct decision_function
02109 {
02110     float64_t *alpha;
02111     float64_t rho;
02112     float64_t objective;
02113 };
02114 
02115 decision_function svm_train_one(
02116     const svm_problem *prob, const svm_parameter *param,
02117     float64_t Cp, float64_t Cn)
02118 {
02119     float64_t *alpha = Malloc(float64_t, prob->l);
02120     Solver::SolutionInfo si;
02121     switch(param->svm_type)
02122     {
02123         case C_SVC:
02124             solve_c_svc(prob,param,alpha,&si,Cp,Cn);
02125             break;
02126         case NU_SVC:
02127             solve_nu_svc(prob,param,alpha,&si);
02128             break;
02129         case ONE_CLASS:
02130             solve_one_class(prob,param,alpha,&si);
02131             break;
02132         case EPSILON_SVR:
02133             solve_epsilon_svr(prob,param,alpha,&si);
02134             break;
02135         case NU_SVR:
02136             solve_nu_svr(prob,param,alpha,&si);
02137             break;
02138     }
02139 
02140     SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
02141 
02142     // output SVs
02143     if (param->svm_type != ONE_CLASS)
02144     {
02145         int32_t nSV = 0;
02146         int32_t nBSV = 0;
02147         for(int32_t i=0;i<prob->l;i++)
02148         {
02149             if(fabs(alpha[i]) > 0)
02150             {
02151                 ++nSV;
02152                 if(prob->y[i] > 0)
02153                 {
02154                     if(fabs(alpha[i]) >= si.upper_bound_p)
02155                         ++nBSV;
02156                 }
02157                 else
02158                 {
02159                     if(fabs(alpha[i]) >= si.upper_bound_n)
02160                         ++nBSV;
02161                 }
02162             }
02163         }
02164         SG_SINFO("nSV = %d, nBSV = %d\n",nSV,nBSV);
02165     }
02166 
02167     decision_function f;
02168     f.alpha = alpha;
02169     f.rho = si.rho;
02170     f.objective=si.obj;
02171     return f;
02172 }
02173 
02174 //
02175 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02176 // perm, length l, must be allocated before calling this subroutine
02177 void svm_group_classes(
02178     const svm_problem *prob, int32_t *nr_class_ret, int32_t **label_ret,
02179     int32_t **start_ret, int32_t **count_ret, int32_t *perm)
02180 {
02181     int32_t l = prob->l;
02182     int32_t max_nr_class = 16;
02183     int32_t nr_class = 0;
02184     int32_t *label = Malloc(int32_t, max_nr_class);
02185     int32_t *count = Malloc(int32_t, max_nr_class);
02186     int32_t *data_label = Malloc(int32_t, l);
02187     int32_t i;
02188 
02189     for(i=0;i<l;i++)
02190     {
02191         int32_t this_label=(int32_t) prob->y[i];
02192         int32_t j;
02193         for(j=0;j<nr_class;j++)
02194         {
02195             if(this_label == label[j])
02196             {
02197                 ++count[j];
02198                 break;
02199             }
02200         }
02201         data_label[i] = j;
02202         if(j == nr_class)
02203         {
02204             if(nr_class == max_nr_class)
02205             {
02206                 max_nr_class *= 2;
02207                 label=(int32_t *) realloc(label,max_nr_class*sizeof(int32_t));
02208                 count=(int32_t *) realloc(count,max_nr_class*sizeof(int32_t));
02209             }
02210             label[nr_class] = this_label;
02211             count[nr_class] = 1;
02212             ++nr_class;
02213         }
02214     }
02215 
02216     int32_t *start = Malloc(int32_t, nr_class);
02217     start[0] = 0;
02218     for(i=1;i<nr_class;i++)
02219         start[i] = start[i-1]+count[i-1];
02220     for(i=0;i<l;i++)
02221     {
02222         perm[start[data_label[i]]] = i;
02223         ++start[data_label[i]];
02224     }
02225     start[0] = 0;
02226     for(i=1;i<nr_class;i++)
02227         start[i] = start[i-1]+count[i-1];
02228 
02229     *nr_class_ret = nr_class;
02230     *label_ret = label;
02231     *start_ret = start;
02232     *count_ret = count;
02233     free(data_label);
02234 }
02235 
02236 //
02237 // Interface functions
02238 //
02239 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02240 {
02241     svm_model *model = Malloc(svm_model,1);
02242     model->param = *param;
02243     model->free_sv = 0; // XXX
02244 
02245     if(param->svm_type == ONE_CLASS ||
02246        param->svm_type == EPSILON_SVR ||
02247        param->svm_type == NU_SVR)
02248     {
02249         SG_SINFO("training one class svm or doing epsilon sv regression\n");
02250 
02251         // regression or one-class-svm
02252         model->nr_class = 2;
02253         model->label = NULL;
02254         model->nSV = NULL;
02255         model->sv_coef = Malloc(float64_t *,1);
02256         decision_function f = svm_train_one(prob,param,0,0);
02257         model->rho = Malloc(float64_t, 1);
02258         model->rho[0] = f.rho;
02259         model->objective = f.objective;
02260 
02261         int32_t nSV = 0;
02262         int32_t i;
02263         for(i=0;i<prob->l;i++)
02264             if(fabs(f.alpha[i]) > 0) ++nSV;
02265         model->l = nSV;
02266         model->SV = Malloc(svm_node *,nSV);
02267         model->sv_coef[0] = Malloc(float64_t, nSV);
02268         int32_t j = 0;
02269         for(i=0;i<prob->l;i++)
02270             if(fabs(f.alpha[i]) > 0)
02271             {
02272                 model->SV[j] = prob->x[i];
02273                 model->sv_coef[0][j] = f.alpha[i];
02274                 ++j;
02275             }
02276 
02277         free(f.alpha);
02278     }
02279     else if(param->svm_type == NU_MULTICLASS_SVC)
02280     {
02281         Solver::SolutionInfo si;
02282         solve_nu_multiclass_svc(prob,param,&si,model);
02283         SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
02284     }
02285     else
02286     {
02287         // classification
02288         int32_t l = prob->l;
02289         int32_t nr_class;
02290         int32_t *label = NULL;
02291         int32_t *start = NULL;
02292         int32_t *count = NULL;
02293         int32_t *perm = Malloc(int32_t, l);
02294 
02295         // group training data of the same class
02296         svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02297         svm_node **x = Malloc(svm_node *,l);
02298         float64_t *C = Malloc(float64_t,l);
02299         float64_t *pv = Malloc(float64_t,l);
02300 
02301 
02302         int32_t i;
02303         for(i=0;i<l;i++) {
02304             x[i] = prob->x[perm[i]];
02305             C[i] = prob->C[perm[i]];
02306 
02307             if (prob->pv)
02308             {
02309                 pv[i] = prob->pv[perm[i]];
02310             }
02311             else
02312             {
02313                 //no custom linear term is set
02314                 pv[i] = -1.0;
02315             }
02316 
02317         }
02318 
02319 
02320         // calculate weighted C
02321         float64_t *weighted_C = Malloc(float64_t, nr_class);
02322         for(i=0;i<nr_class;i++)
02323             weighted_C[i] = param->C;
02324         for(i=0;i<param->nr_weight;i++)
02325         {
02326             int32_t j;
02327             for(j=0;j<nr_class;j++)
02328                 if(param->weight_label[i] == label[j])
02329                     break;
02330             if(j == nr_class)
02331                 SG_SWARNING("warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02332             else
02333                 weighted_C[j] *= param->weight[i];
02334         }
02335 
02336         // train k*(k-1)/2 models
02337 
02338         bool *nonzero = Malloc(bool,l);
02339         for(i=0;i<l;i++)
02340             nonzero[i] = false;
02341         decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02342 
02343         int32_t p = 0;
02344         for(i=0;i<nr_class;i++)
02345             for(int32_t j=i+1;j<nr_class;j++)
02346             {
02347                 svm_problem sub_prob;
02348                 int32_t si = start[i], sj = start[j];
02349                 int32_t ci = count[i], cj = count[j];
02350                 sub_prob.l = ci+cj;
02351                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02352                 sub_prob.y = Malloc(float64_t,sub_prob.l+1); //dirty hack to surpress valgrind err
02353                 sub_prob.C = Malloc(float64_t,sub_prob.l+1);
02354                 sub_prob.pv = Malloc(float64_t,sub_prob.l+1);
02355 
02356                 int32_t k;
02357                 for(k=0;k<ci;k++)
02358                 {
02359                     sub_prob.x[k] = x[si+k];
02360                     sub_prob.y[k] = +1;
02361                     sub_prob.C[k] = C[si+k];
02362                     sub_prob.pv[k] = pv[si+k];
02363 
02364                 }
02365                 for(k=0;k<cj;k++)
02366                 {
02367                     sub_prob.x[ci+k] = x[sj+k];
02368                     sub_prob.y[ci+k] = -1;
02369                     sub_prob.C[ci+k] = C[sj+k];
02370                     sub_prob.pv[ci+k] = pv[sj+k];
02371                 }
02372                 sub_prob.y[sub_prob.l]=-1; //dirty hack to surpress valgrind err
02373                 sub_prob.C[sub_prob.l]=-1;
02374                 sub_prob.pv[sub_prob.l]=-1;
02375 
02376                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02377                 for(k=0;k<ci;k++)
02378                     if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02379                         nonzero[si+k] = true;
02380                 for(k=0;k<cj;k++)
02381                     if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02382                         nonzero[sj+k] = true;
02383                 free(sub_prob.x);
02384                 free(sub_prob.y);
02385                 free(sub_prob.C);
02386                 free(sub_prob.pv);
02387                 ++p;
02388             }
02389 
02390         // build output
02391 
02392         model->objective = f[0].objective;
02393         model->nr_class = nr_class;
02394 
02395         model->label = Malloc(int32_t, nr_class);
02396         for(i=0;i<nr_class;i++)
02397             model->label[i] = label[i];
02398 
02399         model->rho = Malloc(float64_t, nr_class*(nr_class-1)/2);
02400         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02401             model->rho[i] = f[i].rho;
02402 
02403         int32_t total_sv = 0;
02404         int32_t *nz_count = Malloc(int32_t, nr_class);
02405         model->nSV = Malloc(int32_t, nr_class);
02406         for(i=0;i<nr_class;i++)
02407         {
02408             int32_t nSV = 0;
02409             for(int32_t j=0;j<count[i];j++)
02410                 if(nonzero[start[i]+j])
02411                 {
02412                     ++nSV;
02413                     ++total_sv;
02414                 }
02415             model->nSV[i] = nSV;
02416             nz_count[i] = nSV;
02417         }
02418 
02419         SG_SINFO("Total nSV = %d\n",total_sv);
02420 
02421         model->l = total_sv;
02422         model->SV = Malloc(svm_node *,total_sv);
02423         p = 0;
02424         for(i=0;i<l;i++)
02425             if(nonzero[i]) model->SV[p++] = x[i];
02426 
02427         int32_t *nz_start = Malloc(int32_t, nr_class);
02428         nz_start[0] = 0;
02429         for(i=1;i<nr_class;i++)
02430             nz_start[i] = nz_start[i-1]+nz_count[i-1];
02431 
02432         model->sv_coef = Malloc(float64_t *,nr_class-1);
02433         for(i=0;i<nr_class-1;i++)
02434             model->sv_coef[i] = Malloc(float64_t, total_sv);
02435 
02436         p = 0;
02437         for(i=0;i<nr_class;i++)
02438             for(int32_t j=i+1;j<nr_class;j++)
02439             {
02440                 // classifier (i,j): coefficients with
02441                 // i are in sv_coef[j-1][nz_start[i]...],
02442                 // j are in sv_coef[i][nz_start[j]...]
02443 
02444                 int32_t si = start[i];
02445                 int32_t sj = start[j];
02446                 int32_t ci = count[i];
02447                 int32_t cj = count[j];
02448 
02449                 int32_t q = nz_start[i];
02450                 int32_t k;
02451                 for(k=0;k<ci;k++)
02452                     if(nonzero[si+k])
02453                         model->sv_coef[j-1][q++] = f[p].alpha[k];
02454                 q = nz_start[j];
02455                 for(k=0;k<cj;k++)
02456                     if(nonzero[sj+k])
02457                         model->sv_coef[i][q++] = f[p].alpha[ci+k];
02458                 ++p;
02459             }
02460 
02461         free(label);
02462         free(count);
02463         free(perm);
02464         free(start);
02465         free(x);
02466         free(C);
02467         free(pv);
02468         free(weighted_C);
02469         free(nonzero);
02470         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02471             free(f[i].alpha);
02472         free(f);
02473         free(nz_count);
02474         free(nz_start);
02475     }
02476     return model;
02477 }
02478 
02479 void svm_destroy_model(svm_model* model)
02480 {
02481     if(model->free_sv && model->l > 0)
02482         free((void *)(model->SV[0]));
02483     for(int32_t i=0;i<model->nr_class-1;i++)
02484         free(model->sv_coef[i]);
02485     free(model->SV);
02486     free(model->sv_coef);
02487     free(model->rho);
02488     free(model->label);
02489     free(model->nSV);
02490     free(model);
02491 }
02492 
02493 void svm_destroy_param(svm_parameter* param)
02494 {
02495     free(param->weight_label);
02496     free(param->weight);
02497 }
02498 
02499 const char *svm_check_parameter(
02500     const svm_problem *prob, const svm_parameter *param)
02501 {
02502     // svm_type
02503 
02504     int32_t svm_type = param->svm_type;
02505     if(svm_type != C_SVC &&
02506        svm_type != NU_SVC &&
02507        svm_type != ONE_CLASS &&
02508        svm_type != EPSILON_SVR &&
02509        svm_type != NU_SVR &&
02510        svm_type != NU_MULTICLASS_SVC)
02511         return "unknown svm type";
02512 
02513     // kernel_type, degree
02514 
02515     int32_t kernel_type = param->kernel_type;
02516     if(kernel_type != LINEAR &&
02517        kernel_type != POLY &&
02518        kernel_type != RBF &&
02519        kernel_type != SIGMOID &&
02520        kernel_type != PRECOMPUTED)
02521         return "unknown kernel type";
02522 
02523     if(param->degree < 0)
02524         return "degree of polynomial kernel < 0";
02525 
02526     // cache_size,eps,C,nu,p,shrinking
02527 
02528     if(param->cache_size <= 0)
02529         return "cache_size <= 0";
02530 
02531     if(param->eps <= 0)
02532         return "eps <= 0";
02533 
02534     if(svm_type == C_SVC ||
02535        svm_type == EPSILON_SVR ||
02536        svm_type == NU_SVR)
02537         if(param->C <= 0)
02538             return "C <= 0";
02539 
02540     if(svm_type == NU_SVC ||
02541        svm_type == ONE_CLASS ||
02542        svm_type == NU_SVR)
02543         if(param->nu <= 0 || param->nu > 1)
02544             return "nu <= 0 or nu > 1";
02545 
02546     if(svm_type == EPSILON_SVR)
02547         if(param->p < 0)
02548             return "p < 0";
02549 
02550     if(param->shrinking != 0 &&
02551        param->shrinking != 1)
02552         return "shrinking != 0 and shrinking != 1";
02553 
02554 
02555     // check whether nu-svc is feasible
02556 
02557     if(svm_type == NU_SVC)
02558     {
02559         int32_t l = prob->l;
02560         int32_t max_nr_class = 16;
02561         int32_t nr_class = 0;
02562         int32_t *label = Malloc(int32_t, max_nr_class);
02563         int32_t *count = Malloc(int32_t, max_nr_class);
02564 
02565         int32_t i;
02566         for(i=0;i<l;i++)
02567         {
02568             int32_t this_label = (int32_t) prob->y[i];
02569             int32_t j;
02570             for(j=0;j<nr_class;j++)
02571                 if(this_label == label[j])
02572                 {
02573                     ++count[j];
02574                     break;
02575                 }
02576             if(j == nr_class)
02577             {
02578                 if(nr_class == max_nr_class)
02579                 {
02580                     max_nr_class *= 2;
02581                     label=(int32_t *) realloc(label,
02582                         max_nr_class*sizeof(int32_t));
02583                     count=(int32_t *) realloc(count,
02584                         max_nr_class*sizeof(int32_t));
02585                 }
02586                 label[nr_class] = this_label;
02587                 count[nr_class] = 1;
02588                 ++nr_class;
02589             }
02590         }
02591 
02592         for(i=0;i<nr_class;i++)
02593         {
02594             int32_t n1 = count[i];
02595             for(int32_t j=i+1;j<nr_class;j++)
02596             {
02597                 int32_t n2 = count[j];
02598                 if(param->nu*(n1+n2)/2 > CMath::min(n1,n2))
02599                 {
02600                     free(label);
02601                     free(count);
02602                     return "specified nu is infeasible";
02603                 }
02604             }
02605         }
02606         free(label);
02607         free(count);
02608     }
02609 
02610     return NULL;
02611 }
02612 }
02613 #endif // DOXYGEN_SHOULD_SKIP_THIS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation