SHOGUN: SVM_libsvm.cpp Source File

Go to the documentation of this file.
00001 /*
00002  * Copyright (c) 2000-2009 Chih-Chung Chang and Chih-Jen Lin
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * 1. Redistributions of source code must retain the above copyright
00010  * notice, this list of conditions and the following disclaimer.
00011  *
00012  * 2. Redistributions in binary form must reproduce the above copyright
00013  * notice, this list of conditions and the following disclaimer in the
00014  * documentation and/or other materials provided with the distribution.
00015  *
00016  * 3. Neither name of copyright holders nor the names of its contributors
00017  * may be used to endorse or promote products derived from this software
00018  * without specific prior written permission.
00019  *
00020  *
00021  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
00025  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032  *
00033  * Shogun specific adjustments (w) 2006-2009 Soeren Sonnenburg
00034  */
00035 
00036 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00037 
00038 #include <shogun/lib/memory.h>
00039 #include <shogun/classifier/svm/SVM_libsvm.h>
00040 #include <shogun/kernel/Kernel.h>
00041 #include <shogun/io/SGIO.h>
00042 #include <shogun/lib/Time.h>
00043 #include <shogun/lib/Signal.h>
00044 #include <shogun/lib/common.h>
00045 #include <shogun/mathematics/Math.h>
00046 
00047 #include <stdio.h>
00048 #include <stdlib.h>
00049 #include <ctype.h>
00050 #include <float.h>
00051 #include <string.h>
00052 #include <stdarg.h>
00053 
00054 namespace shogun
00055 {
00056 
00057 typedef KERNELCACHE_ELEM Qfloat;
00058 typedef float64_t schar;
00059 
00060 template <class S, class T> inline void clone(T*& dst, S* src, int32_t n)
00061 {
00062     dst = SG_MALLOC(T, n);
00063     memcpy((void *)dst,(void *)src,sizeof(T)*n);
00064 }
00065 #define INF HUGE_VAL
00066 #define TAU 1e-12
00067 
00068 class QMatrix;
00069 class SVC_QMC;
00070 
00071 //
00072 // Kernel Cache
00073 //
00074 // l is the number of total data items
00075 // size is the cache size limit in bytes
00076 //
00077 class Cache
00078 {
00079 public:
00080     Cache(int32_t l, int64_t size);
00081     ~Cache();
00082 
00083     // request data [0,len)
00084     // return some position p where [p,len) need to be filled
00085     // (p >= len if nothing needs to be filled)
00086     int32_t get_data(const int32_t index, Qfloat **data, int32_t len);
00087     void swap_index(int32_t i, int32_t j);  // future_option
00088 
00089 private:
00090     int32_t l;
00091     int64_t size;
00092     struct head_t
00093     {
00094         head_t *prev, *next;    // a circular list
00095         Qfloat *data;
00096         int32_t len;        // data[0,len) is cached in this entry
00097     };
00098 
00099     head_t *head;
00100     head_t lru_head;
00101     void lru_delete(head_t *h);
00102     void lru_insert(head_t *h);
00103 };
00104 
00105 Cache::Cache(int32_t l_, int64_t size_):l(l_),size(size_)
00106 {
00107     head = (head_t *)calloc(l,sizeof(head_t));  // initialized to 0
00108     size /= sizeof(Qfloat);
00109     size -= l * sizeof(head_t) / sizeof(Qfloat);
00110     size = CMath::max(size, (int64_t) 2*l); // cache must be large enough for two columns
00111     lru_head.next = lru_head.prev = &lru_head;
00112 }
00113 
00114 Cache::~Cache()
00115 {
00116     for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00117         SG_FREE(h->data);
00118     SG_FREE(head);
00119 }
00120 
00121 void Cache::lru_delete(head_t *h)
00122 {
00123     // delete from current location
00124     h->prev->next = h->next;
00125     h->next->prev = h->prev;
00126 }
00127 
00128 void Cache::lru_insert(head_t *h)
00129 {
00130     // insert to last position
00131     h->next = &lru_head;
00132     h->prev = lru_head.prev;
00133     h->prev->next = h;
00134     h->next->prev = h;
00135 }
00136 
00137 int32_t Cache::get_data(const int32_t index, Qfloat **data, int32_t len)
00138 {
00139     head_t *h = &head[index];
00140     if(h->len) lru_delete(h);
00141     int32_t more = len - h->len;
00142 
00143     if(more > 0)
00144     {
00145         // free old space
00146         while(size < more)
00147         {
00148             head_t *old = lru_head.next;
00149             lru_delete(old);
00150             SG_FREE(old->data);
00151             size += old->len;
00152             old->data = 0;
00153             old->len = 0;
00154         }
00155 
00156         // allocate new space
00157         h->data = SG_REALLOC(Qfloat, h->data, len);
00158         size -= more;
00159         CMath::swap(h->len,len);
00160     }
00161 
00162     lru_insert(h);
00163     *data = h->data;
00164     return len;
00165 }
00166 
00167 void Cache::swap_index(int32_t i, int32_t j)
00168 {
00169     if(i==j) return;
00170 
00171     if(head[i].len) lru_delete(&head[i]);
00172     if(head[j].len) lru_delete(&head[j]);
00173     CMath::swap(head[i].data,head[j].data);
00174     CMath::swap(head[i].len,head[j].len);
00175     if(head[i].len) lru_insert(&head[i]);
00176     if(head[j].len) lru_insert(&head[j]);
00177 
00178     if(i>j) CMath::swap(i,j);
00179     for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00180     {
00181         if(h->len > i)
00182         {
00183             if(h->len > j)
00184                 CMath::swap(h->data[i],h->data[j]);
00185             else
00186             {
00187                 // give up
00188                 lru_delete(h);
00189                 SG_FREE(h->data);
00190                 size += h->len;
00191                 h->data = 0;
00192                 h->len = 0;
00193             }
00194         }
00195     }
00196 }
00197 
00198 //
00199 // Kernel evaluation
00200 //
00201 // the static method k_function is for doing single kernel evaluation
00202 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00203 // the member function get_Q is for getting one column from the Q Matrix
00204 //
00205 class QMatrix {
00206 public:
00207     virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
00208     virtual Qfloat *get_QD() const = 0;
00209     virtual void swap_index(int32_t i, int32_t j) const = 0;
00210     virtual ~QMatrix() {}
00211 
00212     float64_t max_train_time;
00213 };
00214 
00215 class LibSVMKernel: public QMatrix {
00216 public:
00217     LibSVMKernel(int32_t l, svm_node * const * x, const svm_parameter& param);
00218     virtual ~LibSVMKernel();
00219 
00220     virtual Qfloat *get_Q(int32_t column, int32_t len) const = 0;
00221     virtual Qfloat *get_QD() const = 0;
00222     virtual void swap_index(int32_t i, int32_t j) const // no so const...
00223     {
00224         CMath::swap(x[i],x[j]);
00225         if(x_square) CMath::swap(x_square[i],x_square[j]);
00226     }
00227 
00228     inline float64_t kernel_function(int32_t i, int32_t j) const
00229     {
00230         return kernel->kernel(x[i]->index,x[j]->index);
00231     }
00232 
00233 private:
00234     CKernel* kernel;
00235     const svm_node **x;
00236     float64_t *x_square;
00237 
00238     // svm_parameter
00239     const int32_t kernel_type;
00240     const int32_t degree;
00241     const float64_t gamma;
00242     const float64_t coef0;
00243 };
00244 
00245 LibSVMKernel::LibSVMKernel(int32_t l, svm_node * const * x_, const svm_parameter& param)
00246 : kernel_type(param.kernel_type), degree(param.degree),
00247  gamma(param.gamma), coef0(param.coef0)
00248 {
00249     clone(x,x_,l);
00250     x_square = 0;
00251     kernel=param.kernel;
00252     max_train_time=param.max_train_time;
00253 }
00254 
00255 LibSVMKernel::~LibSVMKernel()
00256 {
00257     SG_FREE(x);
00258     SG_FREE(x_square);
00259 }
00260 
00261 // Generalized SMO+SVMlight algorithm
00262 // Solves:
00263 //
00264 //  min 0.5(\alpha^T Q \alpha) + b^T \alpha
00265 //
00266 //      y^T \alpha = \delta
00267 //      y_i = +1 or -1
00268 //      0 <= alpha_i <= Cp for y_i = 1
00269 //      0 <= alpha_i <= Cn for y_i = -1
00270 //
00271 // Given:
00272 //
00273 //  Q, p, y, Cp, Cn, and an initial feasible point \alpha
00274 //  l is the size of vectors and matrices
00275 //  eps is the stopping tolerance
00276 //
00277 // solution will be put in \alpha, objective value will be put in obj
00278 //
00279 class Solver {
00280 public:
00281     Solver() {};
00282     virtual ~Solver() {};
00283 
00284     struct SolutionInfo {
00285         float64_t obj;
00286         float64_t rho;
00287         float64_t upper_bound_p;
00288         float64_t upper_bound_n;
00289         float64_t r;    // for Solver_NU
00290     };
00291 
00292     void Solve(
00293         int32_t l, const QMatrix& Q, const float64_t *p_, const schar *y_,
00294         float64_t *alpha_, float64_t Cp, float64_t Cn, float64_t eps,
00295         SolutionInfo* si, int32_t shrinking, bool use_bias);
00296 
00297 protected:
00298     int32_t active_size;
00299     schar *y;
00300     float64_t *G;       // gradient of objective function
00301     enum { LOWER_BOUND, UPPER_BOUND, FREE };
00302     char *alpha_status; // LOWER_BOUND, UPPER_BOUND, FREE
00303     float64_t *alpha;
00304     const QMatrix *Q;
00305     const Qfloat *QD;
00306     float64_t eps;
00307     float64_t Cp,Cn;
00308     float64_t *p;
00309     int32_t *active_set;
00310     float64_t *G_bar;       // gradient, if we treat free variables as 0
00311     int32_t l;
00312     bool unshrink;  // XXX
00313 
00314     float64_t get_C(int32_t i)
00315     {
00316         return (y[i] > 0)? Cp : Cn;
00317     }
00318     void update_alpha_status(int32_t i)
00319     {
00320         if(alpha[i] >= get_C(i))
00321             alpha_status[i] = UPPER_BOUND;
00322         else if(alpha[i] <= 0)
00323             alpha_status[i] = LOWER_BOUND;
00324         else alpha_status[i] = FREE;
00325     }
00326     bool is_upper_bound(int32_t i) { return alpha_status[i] == UPPER_BOUND; }
00327     bool is_lower_bound(int32_t i) { return alpha_status[i] == LOWER_BOUND; }
00328     bool is_free(int32_t i) { return alpha_status[i] == FREE; }
00329     void swap_index(int32_t i, int32_t j);
00330     void reconstruct_gradient();
00331     virtual int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
00332     virtual float64_t calculate_rho();
00333     virtual void do_shrinking();
00334 private:
00335     bool be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2);
00336 };
00337 
00338 void Solver::swap_index(int32_t i, int32_t j)
00339 {
00340     Q->swap_index(i,j);
00341     CMath::swap(y[i],y[j]);
00342     CMath::swap(G[i],G[j]);
00343     CMath::swap(alpha_status[i],alpha_status[j]);
00344     CMath::swap(alpha[i],alpha[j]);
00345     CMath::swap(p[i],p[j]);
00346     CMath::swap(active_set[i],active_set[j]);
00347     CMath::swap(G_bar[i],G_bar[j]);
00348 }
00349 
00350 void Solver::reconstruct_gradient()
00351 {
00352     // reconstruct inactive elements of G from G_bar and free variables
00353 
00354     if(active_size == l) return;
00355 
00356     int32_t i,j;
00357     int32_t nr_free = 0;
00358 
00359     for(j=active_size;j<l;j++)
00360         G[j] = G_bar[j] + p[j];
00361 
00362     for(j=0;j<active_size;j++)
00363         if(is_free(j))
00364             nr_free++;
00365 
00366     if (nr_free*l > 2*active_size*(l-active_size))
00367     {
00368         for(i=active_size;i<l;i++)
00369         {
00370             const Qfloat *Q_i = Q->get_Q(i,active_size);
00371             for(j=0;j<active_size;j++)
00372                 if(is_free(j))
00373                     G[i] += alpha[j] * Q_i[j];
00374         }
00375     }
00376     else
00377     {
00378         for(i=0;i<active_size;i++)
00379             if(is_free(i))
00380             {
00381                 const Qfloat *Q_i = Q->get_Q(i,l);
00382                 float64_t alpha_i = alpha[i];
00383                 for(j=active_size;j<l;j++)
00384                     G[j] += alpha_i * Q_i[j];
00385             }
00386     }
00387 }
00388 
00389 void Solver::Solve(
00390     int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
00391     const schar *p_y, float64_t *p_alpha, float64_t p_Cp, float64_t p_Cn,
00392     float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
00393 {
00394     this->l = p_l;
00395     this->Q = &p_Q;
00396     QD=Q->get_QD();
00397     clone(p, p_p,l);
00398     clone(y, p_y,l);
00399     clone(alpha,p_alpha,l);
00400     this->Cp = p_Cp;
00401     this->Cn = p_Cn;
00402     this->eps = p_eps;
00403     unshrink = false;
00404 
00405     // initialize alpha_status
00406     {
00407         alpha_status = SG_MALLOC(char, l);
00408         for(int32_t i=0;i<l;i++)
00409             update_alpha_status(i);
00410     }
00411 
00412     // initialize active set (for shrinking)
00413     {
00414         active_set = SG_MALLOC(int32_t, l);
00415         for(int32_t i=0;i<l;i++)
00416             active_set[i] = i;
00417         active_size = l;
00418     }
00419 
00420     // initialize gradient
00421     CSignal::clear_cancel();
00422     CTime start_time;
00423     {
00424         G = SG_MALLOC(float64_t, l);
00425         G_bar = SG_MALLOC(float64_t, l);
00426         int32_t i;
00427         for(i=0;i<l;i++)
00428         {
00429             G[i] = p_p[i];
00430             G_bar[i] = 0;
00431         }
00432         SG_SINFO("Computing gradient for initial set of non-zero alphas\n");
00433         //CMath::display_vector(alpha, l, "alphas");
00434         for(i=0;i<l && !CSignal::cancel_computations(); i++)
00435         {
00436             if(!is_lower_bound(i))
00437             {
00438                 const Qfloat *Q_i = Q->get_Q(i,l);
00439                 float64_t alpha_i = alpha[i];
00440                 int32_t j;
00441                 for(j=0;j<l;j++)
00442                     G[j] += alpha_i*Q_i[j];
00443                 if(is_upper_bound(i))
00444                     for(j=0;j<l;j++)
00445                         G_bar[j] += get_C(i) * Q_i[j];
00446             }
00447             SG_SPROGRESS(i, 0, l);
00448         }
00449         SG_SDONE();
00450     }
00451 
00452     // optimization step
00453 
00454     int32_t iter = 0;
00455     int32_t counter = CMath::min(l,1000)+1;
00456 
00457     while (!CSignal::cancel_computations())
00458     {
00459         if (Q->max_train_time > 0 && start_time.cur_time_diff() > Q->max_train_time)
00460           break;
00461 
00462         // show progress and do shrinking
00463 
00464         if(--counter == 0)
00465         {
00466             counter = CMath::min(l,1000);
00467             if(shrinking) do_shrinking();
00468             //SG_SINFO(".");
00469         }
00470 
00471         int32_t i,j;
00472         float64_t gap;
00473         if(select_working_set(i,j, gap)!=0)
00474         {
00475             // reconstruct the whole gradient
00476             reconstruct_gradient();
00477             // reset active set size and check
00478             active_size = l;
00479             //SG_SINFO("*");
00480             if(select_working_set(i,j, gap)!=0)
00481                 break;
00482             else
00483                 counter = 1;    // do shrinking next iteration
00484         }
00485 
00486         SG_SABS_PROGRESS(gap, -CMath::log10(gap), -CMath::log10(1), -CMath::log10(eps), 6);
00487 
00488         ++iter;
00489 
00490         // update alpha[i] and alpha[j], handle bounds carefully
00491 
00492         const Qfloat *Q_i = Q->get_Q(i,active_size);
00493         const Qfloat *Q_j = Q->get_Q(j,active_size);
00494 
00495         float64_t C_i = get_C(i);
00496         float64_t C_j = get_C(j);
00497 
00498         float64_t old_alpha_i = alpha[i];
00499         float64_t old_alpha_j = alpha[j];
00500 
00501         if (!use_bias)
00502         {
00503             double pi=G[i]-Q_i[i]*alpha[i]-Q_i[j]*alpha[j];
00504             double pj=G[j]-Q_i[j]*alpha[i]-Q_j[j]*alpha[j];
00505             double det=Q_i[i]*Q_j[j]-Q_i[j]*Q_i[j];
00506             double alpha_i=-(Q_j[j]*pi-Q_i[j]*pj)/det;
00507             alpha_i=CMath::min(C_i,CMath::max(0.0,alpha_i));
00508             double alpha_j=-(-Q_i[j]*pi+Q_i[i]*pj)/det;
00509             alpha_j=CMath::min(C_j,CMath::max(0.0,alpha_j));
00510 
00511             if (alpha_i==0 || alpha_i == C_i)
00512                 alpha_j=CMath::min(C_j,CMath::max(0.0,-(pj+Q_i[j]*alpha_i)/Q_j[j]));
00513             if (alpha_j==0 || alpha_j == C_j)
00514                 alpha_i=CMath::min(C_i,CMath::max(0.0,-(pi+Q_i[j]*alpha_j)/Q_i[i]));
00515 
00516             alpha[i]=alpha_i; alpha[j]=alpha_j;
00517         }
00518         else
00519         {
00520             if(y[i]!=y[j])
00521             {
00522                 float64_t quad_coef = Q_i[i]+Q_j[j]+2*Q_i[j];
00523                 if (quad_coef <= 0)
00524                     quad_coef = TAU;
00525                 float64_t delta = (-G[i]-G[j])/quad_coef;
00526                 float64_t diff = alpha[i] - alpha[j];
00527                 alpha[i] += delta;
00528                 alpha[j] += delta;
00529 
00530                 if(diff > 0)
00531                 {
00532                     if(alpha[j] < 0)
00533                     {
00534                         alpha[j] = 0;
00535                         alpha[i] = diff;
00536                     }
00537                 }
00538                 else
00539                 {
00540                     if(alpha[i] < 0)
00541                     {
00542                         alpha[i] = 0;
00543                         alpha[j] = -diff;
00544                     }
00545                 }
00546                 if(diff > C_i - C_j)
00547                 {
00548                     if(alpha[i] > C_i)
00549                     {
00550                         alpha[i] = C_i;
00551                         alpha[j] = C_i - diff;
00552                     }
00553                 }
00554                 else
00555                 {
00556                     if(alpha[j] > C_j)
00557                     {
00558                         alpha[j] = C_j;
00559                         alpha[i] = C_j + diff;
00560                     }
00561                 }
00562             }
00563             else
00564             {
00565                 float64_t quad_coef = Q_i[i]+Q_j[j]-2*Q_i[j];
00566                 if (quad_coef <= 0)
00567                     quad_coef = TAU;
00568                 float64_t delta = (G[i]-G[j])/quad_coef;
00569                 float64_t sum = alpha[i] + alpha[j];
00570                 alpha[i] -= delta;
00571                 alpha[j] += delta;
00572 
00573                 if(sum > C_i)
00574                 {
00575                     if(alpha[i] > C_i)
00576                     {
00577                         alpha[i] = C_i;
00578                         alpha[j] = sum - C_i;
00579                     }
00580                 }
00581                 else
00582                 {
00583                     if(alpha[j] < 0)
00584                     {
00585                         alpha[j] = 0;
00586                         alpha[i] = sum;
00587                     }
00588                 }
00589                 if(sum > C_j)
00590                 {
00591                     if(alpha[j] > C_j)
00592                     {
00593                         alpha[j] = C_j;
00594                         alpha[i] = sum - C_j;
00595                     }
00596                 }
00597                 else
00598                 {
00599                     if(alpha[i] < 0)
00600                     {
00601                         alpha[i] = 0;
00602                         alpha[j] = sum;
00603                     }
00604                 }
00605             }
00606         }
00607 
00608         // update G
00609 
00610         float64_t delta_alpha_i = alpha[i] - old_alpha_i;
00611         float64_t delta_alpha_j = alpha[j] - old_alpha_j;
00612 
00613         for(int32_t k=0;k<active_size;k++)
00614         {
00615             G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00616         }
00617 
00618         // update alpha_status and G_bar
00619 
00620         {
00621             bool ui = is_upper_bound(i);
00622             bool uj = is_upper_bound(j);
00623             update_alpha_status(i);
00624             update_alpha_status(j);
00625             int32_t k;
00626             if(ui != is_upper_bound(i))
00627             {
00628                 Q_i = Q->get_Q(i,l);
00629                 if(ui)
00630                     for(k=0;k<l;k++)
00631                         G_bar[k] -= C_i * Q_i[k];
00632                 else
00633                     for(k=0;k<l;k++)
00634                         G_bar[k] += C_i * Q_i[k];
00635             }
00636 
00637             if(uj != is_upper_bound(j))
00638             {
00639                 Q_j = Q->get_Q(j,l);
00640                 if(uj)
00641                     for(k=0;k<l;k++)
00642                         G_bar[k] -= C_j * Q_j[k];
00643                 else
00644                     for(k=0;k<l;k++)
00645                         G_bar[k] += C_j * Q_j[k];
00646             }
00647         }
00648 
00649 #ifdef MCSVM_DEBUG
00650         // calculate objective value
00651         {
00652             float64_t v = 0;
00653             for(i=0;i<l;i++)
00654                 v += alpha[i] * (G[i] + p[i]);
00655 
00656             p_si->obj = v/2;
00657 
00658             float64_t primal=0;
00659             //float64_t gap=100000;
00660             SG_SPRINT("dual obj=%f primal obf=%f gap=%f\n", v/2, primal, gap);
00661         }
00662 #endif
00663     }
00664 
00665     // calculate rho
00666 
00667     if (!use_bias)
00668         p_si->rho = 0;
00669     else
00670         p_si->rho = calculate_rho();
00671 
00672     // calculate objective value
00673     {
00674         float64_t v = 0;
00675         int32_t i;
00676         for(i=0;i<l;i++)
00677             v += alpha[i] * (G[i] + p[i]);
00678 
00679         p_si->obj = v/2;
00680     }
00681 
00682     // put back the solution
00683     {
00684         for(int32_t i=0;i<l;i++)
00685             p_alpha[active_set[i]] = alpha[i];
00686     }
00687 
00688     p_si->upper_bound_p = Cp;
00689     p_si->upper_bound_n = Cn;
00690 
00691     SG_SINFO("\noptimization finished, #iter = %d\n",iter);
00692 
00693     SG_FREE(p);
00694     SG_FREE(y);
00695     SG_FREE(alpha);
00696     SG_FREE(alpha_status);
00697     SG_FREE(active_set);
00698     SG_FREE(G);
00699     SG_FREE(G_bar);
00700 }
00701 
00702 // return 1 if already optimal, return 0 otherwise
00703 int32_t Solver::select_working_set(
00704     int32_t &out_i, int32_t &out_j, float64_t &gap)
00705 {
00706     // return i,j such that
00707     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00708     // j: minimizes the decrease of obj value
00709     //    (if quadratic coefficient <= 0, replace it with tau)
00710     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00711 
00712     float64_t Gmax = -INF;
00713     float64_t Gmax2 = -INF;
00714     int32_t Gmax_idx = -1;
00715     int32_t Gmin_idx = -1;
00716     float64_t obj_diff_min = INF;
00717 
00718     for(int32_t t=0;t<active_size;t++)
00719         if(y[t]==+1)
00720         {
00721             if(!is_upper_bound(t))
00722                 if(-G[t] >= Gmax)
00723                 {
00724                     Gmax = -G[t];
00725                     Gmax_idx = t;
00726                 }
00727         }
00728         else
00729         {
00730             if(!is_lower_bound(t))
00731                 if(G[t] >= Gmax)
00732                 {
00733                     Gmax = G[t];
00734                     Gmax_idx = t;
00735                 }
00736         }
00737 
00738     int32_t i = Gmax_idx;
00739     const Qfloat *Q_i = NULL;
00740     if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00741         Q_i = Q->get_Q(i,active_size);
00742 
00743     for(int32_t j=0;j<active_size;j++)
00744     {
00745         if(y[j]==+1)
00746         {
00747             if (!is_lower_bound(j))
00748             {
00749                 float64_t grad_diff=Gmax+G[j];
00750                 if (G[j] >= Gmax2)
00751                     Gmax2 = G[j];
00752                 if (grad_diff > 0)
00753                 {
00754                     float64_t obj_diff;
00755                     float64_t quad_coef=Q_i[i]+QD[j]-2.0*y[i]*Q_i[j];
00756                     if (quad_coef > 0)
00757                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00758                     else
00759                         obj_diff = -(grad_diff*grad_diff)/TAU;
00760 
00761                     if (obj_diff <= obj_diff_min)
00762                     {
00763                         Gmin_idx=j;
00764                         obj_diff_min = obj_diff;
00765                     }
00766                 }
00767             }
00768         }
00769         else
00770         {
00771             if (!is_upper_bound(j))
00772             {
00773                 float64_t grad_diff= Gmax-G[j];
00774                 if (-G[j] >= Gmax2)
00775                     Gmax2 = -G[j];
00776                 if (grad_diff > 0)
00777                 {
00778                     float64_t obj_diff;
00779                     float64_t quad_coef=Q_i[i]+QD[j]+2.0*y[i]*Q_i[j];
00780                     if (quad_coef > 0)
00781                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
00782                     else
00783                         obj_diff = -(grad_diff*grad_diff)/TAU;
00784 
00785                     if (obj_diff <= obj_diff_min)
00786                     {
00787                         Gmin_idx=j;
00788                         obj_diff_min = obj_diff;
00789                     }
00790                 }
00791             }
00792         }
00793     }
00794 
00795     gap=Gmax+Gmax2;
00796     if(gap < eps)
00797         return 1;
00798 
00799     out_i = Gmax_idx;
00800     out_j = Gmin_idx;
00801     return 0;
00802 }
00803 
00804 bool Solver::be_shrunk(int32_t i, float64_t Gmax1, float64_t Gmax2)
00805 {
00806     if(is_upper_bound(i))
00807     {
00808         if(y[i]==+1)
00809             return(-G[i] > Gmax1);
00810         else
00811             return(-G[i] > Gmax2);
00812     }
00813     else if(is_lower_bound(i))
00814     {
00815         if(y[i]==+1)
00816             return(G[i] > Gmax2);
00817         else
00818             return(G[i] > Gmax1);
00819     }
00820     else
00821         return(false);
00822 }
00823 
00824 
00825 void Solver::do_shrinking()
00826 {
00827     int32_t i;
00828     float64_t Gmax1 = -INF;     // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00829     float64_t Gmax2 = -INF;     // max { y_i * grad(f)_i | i in I_low(\alpha) }
00830 
00831     // find maximal violating pair first
00832     for(i=0;i<active_size;i++)
00833     {
00834         if(y[i]==+1)
00835         {
00836             if(!is_upper_bound(i))
00837             {
00838                 if(-G[i] >= Gmax1)
00839                     Gmax1 = -G[i];
00840             }
00841             if(!is_lower_bound(i))
00842             {
00843                 if(G[i] >= Gmax2)
00844                     Gmax2 = G[i];
00845             }
00846         }
00847         else
00848         {
00849             if(!is_upper_bound(i))
00850             {
00851                 if(-G[i] >= Gmax2)
00852                     Gmax2 = -G[i];
00853             }
00854             if(!is_lower_bound(i))
00855             {
00856                 if(G[i] >= Gmax1)
00857                     Gmax1 = G[i];
00858             }
00859         }
00860     }
00861 
00862     if(unshrink == false && Gmax1 + Gmax2 <= eps*10)
00863     {
00864         unshrink = true;
00865         reconstruct_gradient();
00866         active_size = l;
00867     }
00868 
00869     for(i=0;i<active_size;i++)
00870         if (be_shrunk(i, Gmax1, Gmax2))
00871         {
00872             active_size--;
00873             while (active_size > i)
00874             {
00875                 if (!be_shrunk(active_size, Gmax1, Gmax2))
00876                 {
00877                     swap_index(i,active_size);
00878                     break;
00879                 }
00880                 active_size--;
00881             }
00882         }
00883 }
00884 
00885 float64_t Solver::calculate_rho()
00886 {
00887     float64_t r;
00888     int32_t nr_free = 0;
00889     float64_t ub = INF, lb = -INF, sum_free = 0;
00890     for(int32_t i=0;i<active_size;i++)
00891     {
00892         float64_t yG = y[i]*G[i];
00893 
00894         if(is_upper_bound(i))
00895         {
00896             if(y[i]==-1)
00897                 ub = CMath::min(ub,yG);
00898             else
00899                 lb = CMath::max(lb,yG);
00900         }
00901         else if(is_lower_bound(i))
00902         {
00903             if(y[i]==+1)
00904                 ub = CMath::min(ub,yG);
00905             else
00906                 lb = CMath::max(lb,yG);
00907         }
00908         else
00909         {
00910             ++nr_free;
00911             sum_free += yG;
00912         }
00913     }
00914 
00915     if(nr_free>0)
00916         r = sum_free/nr_free;
00917     else
00918         r = (ub+lb)/2;
00919 
00920     return r;
00921 }
00922 
00923 
00924 //
00925 //Solve with individually weighted examples
00926 //
00927 class WeightedSolver : public Solver
00928 {
00929 
00930 public:
00931 
00932     WeightedSolver(float64_t* cost_vec)
00933     {
00934 
00935         this->Cs = cost_vec;
00936 
00937     }
00938 
00939     virtual float64_t get_C(int32_t i)
00940     {
00941 
00942         return Cs[i];
00943     }
00944 
00945 protected:
00946 
00947   float64_t* Cs;
00948 
00949 };
00950 
00951 
00952 //
00953 // Solver for nu-svm classification and regression
00954 //
00955 // additional constraint: e^T \alpha = constant
00956 //
00957 class Solver_NU : public Solver
00958 {
00959 public:
00960     Solver_NU() {}
00961     void Solve(
00962         int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
00963         const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
00964         float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
00965     {
00966         this->si = p_si;
00967         Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,
00968                 shrinking,use_bias);
00969     }
00970 private:
00971     SolutionInfo *si;
00972     int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
00973     float64_t calculate_rho();
00974     bool be_shrunk(
00975         int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
00976         float64_t Gmax4);
00977     void do_shrinking();
00978 };
00979 
00980 // return 1 if already optimal, return 0 otherwise
00981 int32_t Solver_NU::select_working_set(
00982     int32_t &out_i, int32_t &out_j, float64_t &gap)
00983 {
00984     // return i,j such that y_i = y_j and
00985     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00986     // j: minimizes the decrease of obj value
00987     //    (if quadratic coefficient <= 0, replace it with tau)
00988     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00989 
00990     float64_t Gmaxp = -INF;
00991     float64_t Gmaxp2 = -INF;
00992     int32_t Gmaxp_idx = -1;
00993 
00994     float64_t Gmaxn = -INF;
00995     float64_t Gmaxn2 = -INF;
00996     int32_t Gmaxn_idx = -1;
00997 
00998     int32_t Gmin_idx = -1;
00999     float64_t obj_diff_min = INF;
01000 
01001     for(int32_t t=0;t<active_size;t++)
01002         if(y[t]==+1)
01003         {
01004             if(!is_upper_bound(t))
01005                 if(-G[t] >= Gmaxp)
01006                 {
01007                     Gmaxp = -G[t];
01008                     Gmaxp_idx = t;
01009                 }
01010         }
01011         else
01012         {
01013             if(!is_lower_bound(t))
01014                 if(G[t] >= Gmaxn)
01015                 {
01016                     Gmaxn = G[t];
01017                     Gmaxn_idx = t;
01018                 }
01019         }
01020 
01021     int32_t ip = Gmaxp_idx;
01022     int32_t in = Gmaxn_idx;
01023     const Qfloat *Q_ip = NULL;
01024     const Qfloat *Q_in = NULL;
01025     if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01026         Q_ip = Q->get_Q(ip,active_size);
01027     if(in != -1)
01028         Q_in = Q->get_Q(in,active_size);
01029 
01030     for(int32_t j=0;j<active_size;j++)
01031     {
01032         if(y[j]==+1)
01033         {
01034             if (!is_lower_bound(j))
01035             {
01036                 float64_t grad_diff=Gmaxp+G[j];
01037                 if (G[j] >= Gmaxp2)
01038                     Gmaxp2 = G[j];
01039                 if (grad_diff > 0)
01040                 {
01041                     float64_t obj_diff;
01042                     float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01043                     if (quad_coef > 0)
01044                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
01045                     else
01046                         obj_diff = -(grad_diff*grad_diff)/TAU;
01047 
01048                     if (obj_diff <= obj_diff_min)
01049                     {
01050                         Gmin_idx=j;
01051                         obj_diff_min = obj_diff;
01052                     }
01053                 }
01054             }
01055         }
01056         else
01057         {
01058             if (!is_upper_bound(j))
01059             {
01060                 float64_t grad_diff=Gmaxn-G[j];
01061                 if (-G[j] >= Gmaxn2)
01062                     Gmaxn2 = -G[j];
01063                 if (grad_diff > 0)
01064                 {
01065                     float64_t obj_diff;
01066                     float64_t quad_coef = Q_in[in]+QD[j]-2*Q_in[j];
01067                     if (quad_coef > 0)
01068                         obj_diff = -(grad_diff*grad_diff)/quad_coef;
01069                     else
01070                         obj_diff = -(grad_diff*grad_diff)/TAU;
01071 
01072                     if (obj_diff <= obj_diff_min)
01073                     {
01074                         Gmin_idx=j;
01075                         obj_diff_min = obj_diff;
01076                     }
01077                 }
01078             }
01079         }
01080     }
01081 
01082     gap=CMath::max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2);
01083     if(gap < eps)
01084         return 1;
01085 
01086     if (y[Gmin_idx] == +1)
01087         out_i = Gmaxp_idx;
01088     else
01089         out_i = Gmaxn_idx;
01090     out_j = Gmin_idx;
01091 
01092     return 0;
01093 }
01094 
01095 bool Solver_NU::be_shrunk(
01096     int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
01097     float64_t Gmax4)
01098 {
01099     if(is_upper_bound(i))
01100     {
01101         if(y[i]==+1)
01102             return(-G[i] > Gmax1);
01103         else
01104             return(-G[i] > Gmax4);
01105     }
01106     else if(is_lower_bound(i))
01107     {
01108         if(y[i]==+1)
01109             return(G[i] > Gmax2);
01110         else
01111             return(G[i] > Gmax3);
01112     }
01113     else
01114         return(false);
01115 }
01116 
01117 void Solver_NU::do_shrinking()
01118 {
01119     float64_t Gmax1 = -INF; // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01120     float64_t Gmax2 = -INF; // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01121     float64_t Gmax3 = -INF; // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01122     float64_t Gmax4 = -INF; // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01123 
01124     // find maximal violating pair first
01125     int32_t i;
01126     for(i=0;i<active_size;i++)
01127     {
01128         if(!is_upper_bound(i))
01129         {
01130             if(y[i]==+1)
01131             {
01132                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01133             }
01134             else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01135         }
01136         if(!is_lower_bound(i))
01137         {
01138             if(y[i]==+1)
01139             {
01140                 if(G[i] > Gmax2) Gmax2 = G[i];
01141             }
01142             else    if(G[i] > Gmax3) Gmax3 = G[i];
01143         }
01144     }
01145 
01146     if(unshrink == false && CMath::max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10)
01147     {
01148         unshrink = true;
01149         reconstruct_gradient();
01150         active_size = l;
01151     }
01152 
01153     for(i=0;i<active_size;i++)
01154         if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01155         {
01156             active_size--;
01157             while (active_size > i)
01158             {
01159                 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01160                 {
01161                     swap_index(i,active_size);
01162                     break;
01163                 }
01164                 active_size--;
01165             }
01166         }
01167 }
01168 
01169 float64_t Solver_NU::calculate_rho()
01170 {
01171     int32_t nr_free1 = 0,nr_free2 = 0;
01172     float64_t ub1 = INF, ub2 = INF;
01173     float64_t lb1 = -INF, lb2 = -INF;
01174     float64_t sum_free1 = 0, sum_free2 = 0;
01175 
01176     for(int32_t i=0; i<active_size; i++)
01177     {
01178         if(y[i]==+1)
01179         {
01180             if(is_upper_bound(i))
01181                 lb1 = CMath::max(lb1,G[i]);
01182             else if(is_lower_bound(i))
01183                 ub1 = CMath::min(ub1,G[i]);
01184             else
01185             {
01186                 ++nr_free1;
01187                 sum_free1 += G[i];
01188             }
01189         }
01190         else
01191         {
01192             if(is_upper_bound(i))
01193                 lb2 = CMath::max(lb2,G[i]);
01194             else if(is_lower_bound(i))
01195                 ub2 = CMath::min(ub2,G[i]);
01196             else
01197             {
01198                 ++nr_free2;
01199                 sum_free2 += G[i];
01200             }
01201         }
01202     }
01203 
01204     float64_t r1,r2;
01205     if(nr_free1 > 0)
01206         r1 = sum_free1/nr_free1;
01207     else
01208         r1 = (ub1+lb1)/2;
01209 
01210     if(nr_free2 > 0)
01211         r2 = sum_free2/nr_free2;
01212     else
01213         r2 = (ub2+lb2)/2;
01214 
01215     si->r = (r1+r2)/2;
01216     return (r1-r2)/2;
01217 }
01218 
01219 class SVC_QMC: public LibSVMKernel
01220 {
01221 public:
01222     SVC_QMC(const svm_problem& prob, const svm_parameter& param, const schar *y_, int32_t n_class, float64_t fac)
01223     :LibSVMKernel(prob.l, prob.x, param)
01224     {
01225         nr_class=n_class;
01226         factor=fac;
01227         clone(y,y_,prob.l);
01228         cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
01229         QD = SG_MALLOC(Qfloat, prob.l);
01230         for(int32_t i=0;i<prob.l;i++)
01231         {
01232             QD[i]= factor*(nr_class-1)*kernel_function(i,i);
01233         }
01234     }
01235 
01236     Qfloat *get_Q(int32_t i, int32_t len) const
01237     {
01238         Qfloat *data;
01239         int32_t start;
01240         if((start = cache->get_data(i,&data,len)) < len)
01241         {
01242             for(int32_t j=start;j<len;j++)
01243             {
01244                 if (y[i]==y[j])
01245                     data[j] = factor*(nr_class-1)*kernel_function(i,j);
01246                 else
01247                     data[j] = -factor*kernel_function(i,j);
01248             }
01249         }
01250         return data;
01251     }
01252 
01253     inline Qfloat get_orig_Qij(Qfloat Q, int32_t i, int32_t j)
01254     {
01255         if (y[i]==y[j])
01256             return Q/(nr_class-1);
01257         else
01258             return -Q;
01259     }
01260 
01261     Qfloat *get_QD() const
01262     {
01263         return QD;
01264     }
01265 
01266     void swap_index(int32_t i, int32_t j) const
01267     {
01268         cache->swap_index(i,j);
01269         LibSVMKernel::swap_index(i,j);
01270         CMath::swap(y[i],y[j]);
01271         CMath::swap(QD[i],QD[j]);
01272     }
01273 
01274     ~SVC_QMC()
01275     {
01276         SG_FREE(y);
01277         delete cache;
01278         SG_FREE(QD);
01279     }
01280 private:
01281     float64_t factor;
01282     float64_t nr_class;
01283     schar *y;
01284     Cache *cache;
01285     Qfloat *QD;
01286 };
01287 
01288 //
01289 // Solver for nu-svm classification and regression
01290 //
01291 // additional constraint: e^T \alpha = constant
01292 //
01293 class Solver_NUMC : public Solver
01294 {
01295 public:
01296     Solver_NUMC(int32_t n_class, float64_t svm_nu)
01297     {
01298         nr_class=n_class;
01299         nu=svm_nu;
01300     }
01301 
01302     void Solve(
01303         int32_t p_l, const QMatrix& p_Q, const float64_t *p_p,
01304         const schar *p_y, float64_t* p_alpha, float64_t p_Cp, float64_t p_Cn,
01305         float64_t p_eps, SolutionInfo* p_si, int32_t shrinking, bool use_bias)
01306     {
01307         this->si = p_si;
01308         Solver::Solve(p_l,p_Q,p_p,p_y,p_alpha,p_Cp,p_Cn,p_eps,p_si,shrinking, use_bias);
01309     }
01310     float64_t compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases,float64_t* normwcw);
01311 
01312 private:
01313     SolutionInfo *si;
01314     int32_t select_working_set(int32_t &i, int32_t &j, float64_t &gap);
01315     float64_t calculate_rho();
01316     bool be_shrunk(
01317         int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
01318         float64_t Gmax4);
01319     void do_shrinking();
01320 
01321 private:
01322     int32_t nr_class;
01323     float64_t  nu;
01324 };
01325 
01326 float64_t Solver_NUMC::compute_primal(const schar* p_y, float64_t* p_alpha, float64_t* biases, float64_t* normwcw)
01327 {
01328     clone(y, p_y,l);
01329     clone(alpha,p_alpha,l);
01330 
01331     alpha_status = SG_MALLOC(char, l);
01332     for(int32_t i=0;i<l;i++)
01333         update_alpha_status(i);
01334 
01335     float64_t* class_count = SG_MALLOC(float64_t, nr_class);
01336     float64_t* outputs = SG_MALLOC(float64_t, l);
01337 
01338     for (int32_t i=0; i<nr_class; i++)
01339     {
01340         class_count[i]=0;
01341         biases[i+1]=0;
01342     }
01343 
01344     for (int32_t i=0; i<active_size; i++)
01345     {
01346         update_alpha_status(i);
01347         if(!is_upper_bound(i) && !is_lower_bound(i))
01348             class_count[(int32_t) y[i]]++;
01349     }
01350 
01351     //CMath::display_vector(class_count, nr_class, "class_count");
01352 
01353     float64_t mu=((float64_t) nr_class)/(nu*l);
01354     //SG_SPRINT("nr_class=%d, l=%d, active_size=%d, nu=%f, mu=%f\n", nr_class, l, active_size, nu, mu);
01355 
01356     float64_t rho=0;
01357     float64_t quad=0;
01358     float64_t* zero_counts  = SG_MALLOC(float64_t, nr_class);
01359     float64_t normwc_const = 0;
01360 
01361     for (int32_t i=0; i<nr_class; i++)
01362     {
01363         zero_counts[i]=-INF;
01364         normwcw[i]=0;
01365     }
01366 
01367     for (int32_t i=0; i<active_size; i++)
01368     {
01369         float64_t sum_free=0;
01370         float64_t sum_atbound=0;
01371         float64_t sum_zero_count=0;
01372 
01373         Qfloat* Q_i = Q->get_Q(i,active_size);
01374         outputs[i]=0;
01375 
01376         for (int j=0; j<active_size; j++)
01377         {
01378             quad+= alpha[i]*alpha[j]*Q_i[j];
01379             float64_t tmp= alpha[j]*Q_i[j]/mu;
01380 
01381             if(!is_upper_bound(i) && !is_lower_bound(i))
01382                 sum_free+=tmp;
01383             else
01384                 sum_atbound+=tmp;
01385 
01386             if (class_count[(int32_t) y[i]] == 0 && y[j]==y[i])
01387                 sum_zero_count+= tmp;
01388 
01389             SVC_QMC* QMC=(SVC_QMC*) Q;
01390             float64_t norm_tmp=alpha[i]*alpha[j]*QMC->get_orig_Qij(Q_i[j], i, j);
01391             if (y[i]==y[j])
01392                 normwcw[(int32_t) y[i]]+=norm_tmp;
01393 
01394             normwcw[(int32_t) y[i]]-=2.0/nr_class*norm_tmp;
01395             normwc_const+=norm_tmp;
01396         }
01397 
01398         if (class_count[(int32_t) y[i]] == 0)
01399         {
01400             if (zero_counts[(int32_t) y[i]]<sum_zero_count)
01401                 zero_counts[(int32_t) y[i]]=sum_zero_count;
01402         }
01403 
01404         biases[(int32_t) y[i]+1]-=sum_free;
01405         if (class_count[(int32_t) y[i]] != 0.0)
01406             rho+=sum_free/class_count[(int32_t) y[i]];
01407         outputs[i]+=sum_free+sum_atbound;
01408     }
01409 
01410     for (int32_t i=0; i<nr_class; i++)
01411     {
01412         if (class_count[i] == 0.0)
01413             rho+=zero_counts[i];
01414 
01415         normwcw[i]+=normwc_const/CMath::sq(nr_class);
01416         normwcw[i]=CMath::sqrt(normwcw[i]);
01417     }
01418 
01419     CMath::display_vector(normwcw, nr_class, "normwcw");
01420 
01421     rho/=nr_class;
01422 
01423     SG_SPRINT("rho=%f\n", rho);
01424 
01425     float64_t sumb=0;
01426     for (int32_t i=0; i<nr_class; i++)
01427     {
01428         if (class_count[i] != 0.0)
01429             biases[i+1]=biases[i+1]/class_count[i]+rho;
01430         else
01431             biases[i+1]+=rho-zero_counts[i];
01432 
01433         SG_SPRINT("biases=%f\n", biases[i+1]);
01434 
01435         sumb+=biases[i+1];
01436     }
01437     SG_SPRINT("sumb=%f\n", sumb);
01438 
01439     SG_FREE(zero_counts);
01440 
01441     for (int32_t i=0; i<l; i++)
01442         outputs[i]+=biases[(int32_t) y[i]+1];
01443 
01444     biases[0]=rho;
01445 
01446     //CMath::display_vector(outputs, l, "outputs");
01447 
01448 
01449     float64_t xi=0;
01450     for (int32_t i=0; i<active_size; i++)
01451     {
01452         if (is_lower_bound(i))
01453             continue;
01454         xi+=rho-outputs[i];
01455     }
01456 
01457     //SG_SPRINT("xi=%f\n", xi);
01458 
01459     //SG_SPRINT("quad=%f Cp=%f xi*mu=%f\n", quad, nr_class*rho, xi*mu);
01460 
01461     float64_t primal=0.5*quad- nr_class*rho+xi*mu;
01462 
01463     //SG_SPRINT("primal=%10.10f\n", primal);
01464 
01465     SG_FREE(y);
01466     SG_FREE(alpha);
01467     SG_FREE(alpha_status);
01468 
01469     return primal;
01470 }
01471 
01472 
01473 // return 1 if already optimal, return 0 otherwise
01474 int32_t Solver_NUMC::select_working_set(
01475     int32_t &out_i, int32_t &out_j, float64_t &gap)
01476 {
01477     // return i,j such that y_i = y_j and
01478     // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01479     // j: minimizes the decrease of obj value
01480     //    (if quadratic coefficient <= 0, replace it with tau)
01481     //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01482 
01483     int32_t retval=0;
01484     float64_t best_gap=0;
01485     int32_t best_out_i=-1;
01486     int32_t best_out_j=-1;
01487 
01488     float64_t* Gmaxp = SG_MALLOC(float64_t, nr_class);
01489     float64_t* Gmaxp2 = SG_MALLOC(float64_t, nr_class);
01490     int32_t* Gmaxp_idx = SG_MALLOC(int32_t, nr_class);
01491 
01492     int32_t* Gmin_idx = SG_MALLOC(int32_t, nr_class);
01493     float64_t* obj_diff_min = SG_MALLOC(float64_t, nr_class);
01494 
01495     for (int32_t i=0; i<nr_class; i++)
01496     {
01497         Gmaxp[i]=-INF;
01498         Gmaxp2[i]=-INF;
01499         Gmaxp_idx[i]=-1;
01500         Gmin_idx[i]=-1;
01501         obj_diff_min[i]=INF;
01502     }
01503 
01504     for(int32_t t=0;t<active_size;t++)
01505     {
01506         int32_t cidx=y[t];
01507         if(!is_upper_bound(t))
01508         {
01509             if(-G[t] >= Gmaxp[cidx])
01510             {
01511                 Gmaxp[cidx] = -G[t];
01512                 Gmaxp_idx[cidx] = t;
01513             }
01514         }
01515     }
01516 
01517     for(int32_t j=0;j<active_size;j++)
01518     {
01519         int32_t cidx=y[j];
01520         int32_t ip = Gmaxp_idx[cidx];
01521         const Qfloat *Q_ip = NULL;
01522         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01523             Q_ip = Q->get_Q(ip,active_size);
01524 
01525         if (!is_lower_bound(j))
01526         {
01527             float64_t grad_diff=Gmaxp[cidx]+G[j];
01528             if (G[j] >= Gmaxp2[cidx])
01529                 Gmaxp2[cidx] = G[j];
01530             if (grad_diff > 0)
01531             {
01532                 float64_t obj_diff;
01533                 float64_t quad_coef = Q_ip[ip]+QD[j]-2*Q_ip[j];
01534                 if (quad_coef > 0)
01535                     obj_diff = -(grad_diff*grad_diff)/quad_coef;
01536                 else
01537                     obj_diff = -(grad_diff*grad_diff)/TAU;
01538 
01539                 if (obj_diff <= obj_diff_min[cidx])
01540                 {
01541                     Gmin_idx[cidx]=j;
01542                     obj_diff_min[cidx] = obj_diff;
01543                 }
01544             }
01545         }
01546 
01547         gap=Gmaxp[cidx]+Gmaxp2[cidx];
01548         if (gap>=best_gap && Gmin_idx[cidx]>=0 &&
01549                 Gmaxp_idx[cidx]>=0 && Gmin_idx[cidx]<active_size)
01550         {
01551             out_i = Gmaxp_idx[cidx];
01552             out_j = Gmin_idx[cidx];
01553 
01554             best_gap=gap;
01555             best_out_i=out_i;
01556             best_out_j=out_j;
01557         }
01558     }
01559 
01560     gap=best_gap;
01561     out_i=best_out_i;
01562     out_j=best_out_j;
01563 
01564     SG_SDEBUG("i=%d j=%d best_gap=%f y_i=%f y_j=%f\n", out_i, out_j, gap, y[out_i], y[out_j]);
01565 
01566 
01567     if(gap < eps)
01568         retval=1;
01569 
01570     SG_FREE(Gmaxp);
01571     SG_FREE(Gmaxp2);
01572     SG_FREE(Gmaxp_idx);
01573     SG_FREE(Gmin_idx);
01574     SG_FREE(obj_diff_min);
01575 
01576     return retval;
01577 }
01578 
01579 bool Solver_NUMC::be_shrunk(
01580     int32_t i, float64_t Gmax1, float64_t Gmax2, float64_t Gmax3,
01581     float64_t Gmax4)
01582 {
01583     return false;
01584 }
01585 
01586 void Solver_NUMC::do_shrinking()
01587 {
01588 }
01589 
01590 float64_t Solver_NUMC::calculate_rho()
01591 {
01592     return 0;
01593 }
01594 
01595 
01596 //
01597 // Q matrices for various formulations
01598 //
01599 class SVC_Q: public LibSVMKernel
01600 {
01601 public:
01602     SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01603     :LibSVMKernel(prob.l, prob.x, param)
01604     {
01605         clone(y,y_,prob.l);
01606         cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
01607         QD = SG_MALLOC(Qfloat, prob.l);
01608         for(int32_t i=0;i<prob.l;i++)
01609             QD[i]= (Qfloat)kernel_function(i,i);
01610     }
01611 
01612     Qfloat *get_Q(int32_t i, int32_t len) const
01613     {
01614         Qfloat *data;
01615         int32_t start;
01616         if((start = cache->get_data(i,&data,len)) < len)
01617         {
01618             for(int32_t j=start;j<len;j++)
01619                 data[j] = (Qfloat) y[i]*y[j]*kernel_function(i,j);
01620         }
01621         return data;
01622     }
01623 
01624     Qfloat *get_QD() const
01625     {
01626         return QD;
01627     }
01628 
01629     void swap_index(int32_t i, int32_t j) const
01630     {
01631         cache->swap_index(i,j);
01632         LibSVMKernel::swap_index(i,j);
01633         CMath::swap(y[i],y[j]);
01634         CMath::swap(QD[i],QD[j]);
01635     }
01636 
01637     ~SVC_Q()
01638     {
01639         SG_FREE(y);
01640         delete cache;
01641         SG_FREE(QD);
01642     }
01643 private:
01644     schar *y;
01645     Cache *cache;
01646     Qfloat *QD;
01647 };
01648 
01649 
01650 class ONE_CLASS_Q: public LibSVMKernel
01651 {
01652 public:
01653     ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01654     :LibSVMKernel(prob.l, prob.x, param)
01655     {
01656         cache = new Cache(prob.l,(int64_t)(param.cache_size*(1l<<20)));
01657         QD = SG_MALLOC(Qfloat, prob.l);
01658         for(int32_t i=0;i<prob.l;i++)
01659             QD[i]= (Qfloat)kernel_function(i,i);
01660     }
01661 
01662     Qfloat *get_Q(int32_t i, int32_t len) const
01663     {
01664         Qfloat *data;
01665         int32_t start;
01666         if((start = cache->get_data(i,&data,len)) < len)
01667         {
01668             for(int32_t j=start;j<len;j++)
01669                 data[j] = (Qfloat) kernel_function(i,j);
01670         }
01671         return data;
01672     }
01673 
01674     Qfloat *get_QD() const
01675     {
01676         return QD;
01677     }
01678 
01679     void swap_index(int32_t i, int32_t j) const
01680     {
01681         cache->swap_index(i,j);
01682         LibSVMKernel::swap_index(i,j);
01683         CMath::swap(QD[i],QD[j]);
01684     }
01685 
01686     ~ONE_CLASS_Q()
01687     {
01688         delete cache;
01689         SG_FREE(QD);
01690     }
01691 private:
01692     Cache *cache;
01693     Qfloat *QD;
01694 };
01695 
01696 class SVR_Q: public LibSVMKernel
01697 {
01698 public:
01699     SVR_Q(const svm_problem& prob, const svm_parameter& param)
01700     :LibSVMKernel(prob.l, prob.x, param)
01701     {
01702         l = prob.l;
01703         cache = new Cache(l,(int64_t)(param.cache_size*(1l<<20)));
01704         QD = SG_MALLOC(Qfloat, 2*l);
01705         sign = SG_MALLOC(schar, 2*l);
01706         index = SG_MALLOC(int32_t, 2*l);
01707         for(int32_t k=0;k<l;k++)
01708         {
01709             sign[k] = 1;
01710             sign[k+l] = -1;
01711             index[k] = k;
01712             index[k+l] = k;
01713             QD[k]= (Qfloat)kernel_function(k,k);
01714             QD[k+l]=QD[k];
01715         }
01716         buffer[0] = SG_MALLOC(Qfloat, 2*l);
01717         buffer[1] = SG_MALLOC(Qfloat, 2*l);
01718         next_buffer = 0;
01719     }
01720 
01721     void swap_index(int32_t i, int32_t j) const
01722     {
01723         CMath::swap(sign[i],sign[j]);
01724         CMath::swap(index[i],index[j]);
01725         CMath::swap(QD[i],QD[j]);
01726     }
01727 
01728     Qfloat *get_Q(int32_t i, int32_t len) const
01729     {
01730         Qfloat *data;
01731         int32_t real_i = index[i];
01732         if(cache->get_data(real_i,&data,l) < l)
01733         {
01734             for(int32_t j=0;j<l;j++)
01735                 data[j] = (Qfloat)kernel_function(real_i,j);
01736         }
01737 
01738         // reorder and copy
01739         Qfloat *buf = buffer[next_buffer];
01740         next_buffer = 1 - next_buffer;
01741         schar si = sign[i];
01742         for(int32_t j=0;j<len;j++)
01743             buf[j] = si * sign[j] * data[index[j]];
01744         return buf;
01745     }
01746 
01747     Qfloat *get_QD() const
01748     {
01749         return QD;
01750     }
01751 
01752     ~SVR_Q()
01753     {
01754         delete cache;
01755         SG_FREE(sign);
01756         SG_FREE(index);
01757         SG_FREE(buffer[0]);
01758         SG_FREE(buffer[1]);
01759         SG_FREE(QD);
01760     }
01761 
01762 private:
01763     int32_t l;
01764     Cache *cache;
01765     schar *sign;
01766     int32_t *index;
01767     mutable int32_t next_buffer;
01768     Qfloat *buffer[2];
01769     Qfloat *QD;
01770 };
01771 
01772 //
01773 // construct and solve various formulations
01774 //
01775 static void solve_c_svc(
01776     const svm_problem *prob, const svm_parameter* param,
01777     float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
01778 {
01779     int32_t l = prob->l;
01780     schar *y = SG_MALLOC(schar, l);
01781 
01782     int32_t i;
01783 
01784     for(i=0;i<l;i++)
01785     {
01786         alpha[i] = 0;
01787         if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01788     }
01789 
01790     Solver s;
01791     s.Solve(l, SVC_Q(*prob,*param,y), prob->pv, y,
01792         alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias);
01793 
01794     float64_t sum_alpha=0;
01795     for(i=0;i<l;i++)
01796         sum_alpha += alpha[i];
01797 
01798     if (Cp==Cn)
01799         SG_SINFO("nu = %f\n", sum_alpha/(param->C*prob->l));
01800 
01801     for(i=0;i<l;i++)
01802         alpha[i] *= y[i];
01803 
01804     SG_FREE(y);
01805 }
01806 
01807 
01808 //two weighted datasets
01809 void solve_c_svc_weighted(
01810     const svm_problem *prob, const svm_parameter* param,
01811     float64_t *alpha, Solver::SolutionInfo* si, float64_t Cp, float64_t Cn)
01812 {
01813     int l = prob->l;
01814     float64_t *minus_ones = SG_MALLOC(float64_t, l);
01815     schar *y = SG_MALLOC(schar, l);
01816 
01817     int i;
01818 
01819     for(i=0;i<l;i++)
01820     {
01821         alpha[i] = 0;
01822         minus_ones[i] = -1;
01823         if(prob->y[i] > 0) y[i] = +1; else y[i]=-1;
01824     }
01825 
01826     WeightedSolver s = WeightedSolver(prob->C);
01827     s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01828         alpha, Cp, Cn, param->eps, si, param->shrinking, param->use_bias);
01829 
01830     float64_t sum_alpha=0;
01831     for(i=0;i<l;i++)
01832         sum_alpha += alpha[i];
01833 
01834     //if (Cp==Cn)
01835     //  SG_SINFO("nu = %f\n", sum_alpha/(prob->C*prob->l));
01836 
01837     for(i=0;i<l;i++)
01838         alpha[i] *= y[i];
01839 
01840     SG_FREE(minus_ones);
01841     SG_FREE(y);
01842 }
01843 
01844 static void solve_nu_svc(
01845     const svm_problem *prob, const svm_parameter *param,
01846     float64_t *alpha, Solver::SolutionInfo* si)
01847 {
01848     int32_t i;
01849     int32_t l = prob->l;
01850     float64_t nu = param->nu;
01851 
01852     schar *y = SG_MALLOC(schar, l);
01853 
01854     for(i=0;i<l;i++)
01855         if(prob->y[i]>0)
01856             y[i] = +1;
01857         else
01858             y[i] = -1;
01859 
01860     float64_t sum_pos = nu*l/2;
01861     float64_t sum_neg = nu*l/2;
01862 
01863     for(i=0;i<l;i++)
01864         if(y[i] == +1)
01865         {
01866             alpha[i] = CMath::min(1.0,sum_pos);
01867             sum_pos -= alpha[i];
01868         }
01869         else
01870         {
01871             alpha[i] = CMath::min(1.0,sum_neg);
01872             sum_neg -= alpha[i];
01873         }
01874 
01875     float64_t *zeros = SG_MALLOC(float64_t, l);
01876 
01877     for(i=0;i<l;i++)
01878         zeros[i] = 0;
01879 
01880     Solver_NU s;
01881     s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01882         alpha, 1.0, 1.0, param->eps, si,  param->shrinking, param->use_bias);
01883     float64_t r = si->r;
01884 
01885     SG_SINFO("C = %f\n",1/r);
01886 
01887     for(i=0;i<l;i++)
01888         alpha[i] *= y[i]/r;
01889 
01890     si->rho /= r;
01891     si->obj /= (r*r);
01892     si->upper_bound_p = 1/r;
01893     si->upper_bound_n = 1/r;
01894 
01895     SG_FREE(y);
01896     SG_FREE(zeros);
01897 }
01898 
01899 static void solve_nu_multiclass_svc(const svm_problem *prob,
01900         const svm_parameter *param, Solver::SolutionInfo* si, svm_model* model)
01901 {
01902     int32_t l = prob->l;
01903     float64_t nu = param->nu;
01904 
01905     float64_t *alpha = SG_MALLOC(float64_t, prob->l);
01906     schar *y = SG_MALLOC(schar, l);
01907 
01908     for(int32_t i=0;i<l;i++)
01909     {
01910         alpha[i] = 0;
01911         y[i]=prob->y[i];
01912     }
01913 
01914     int32_t nr_class=param->nr_class;
01915     float64_t* sum_class = SG_MALLOC(float64_t, nr_class);
01916 
01917     for (int32_t j=0; j<nr_class; j++)
01918         sum_class[j] = nu*l/nr_class;
01919 
01920     for(int32_t i=0;i<l;i++)
01921     {
01922         alpha[i] = CMath::min(1.0,sum_class[int32_t(y[i])]);
01923         sum_class[int32_t(y[i])] -= alpha[i];
01924     }
01925     SG_FREE(sum_class);
01926 
01927 
01928     float64_t *zeros = SG_MALLOC(float64_t, l);
01929 
01930     for (int32_t i=0;i<l;i++)
01931         zeros[i] = 0;
01932 
01933     Solver_NUMC s(nr_class, nu);
01934     SVC_QMC Q(*prob,*param,y, nr_class, ((float64_t) nr_class)/CMath::sq(nu*l));
01935 
01936     s.Solve(l, Q, zeros, y,
01937         alpha, 1.0, 1.0, param->eps, si,  param->shrinking, param->use_bias);
01938 
01939 
01940     int32_t* class_sv_count=SG_MALLOC(int32_t, nr_class);
01941 
01942     for (int32_t i=0; i<nr_class; i++)
01943         class_sv_count[i]=0;
01944 
01945     for (int32_t i=0; i<l; i++)
01946     {
01947         if (CMath::abs(alpha[i]) > 0)
01948             class_sv_count[(int32_t) y[i]]++;
01949     }
01950 
01951     model->l=l;
01952     // rho[0]= rho in mcsvm paper, rho[1]...rho[nr_class] is bias in mcsvm paper
01953     model->rho = SG_MALLOC(float64_t, nr_class+1);
01954     model->nr_class = nr_class;
01955     model->label = NULL;
01956     model->SV = SG_MALLOC(svm_node*,nr_class);
01957     model->nSV = SG_MALLOC(int32_t, nr_class);
01958     model->sv_coef = SG_MALLOC(float64_t *,nr_class);
01959     model->normwcw = SG_MALLOC(float64_t,nr_class);
01960 
01961     for (int32_t i=0; i<nr_class+1; i++)
01962         model->rho[i]=0;
01963 
01964     model->objective = si->obj;
01965 
01966     if (param->use_bias)
01967     {
01968         SG_SDEBUG("Computing biases and primal objective\n");
01969         float64_t primal = s.compute_primal(y, alpha, model->rho, model->normwcw);
01970         SG_SINFO("Primal = %10.10f\n", primal);
01971     }
01972 
01973     for (int32_t i=0; i<nr_class; i++)
01974     {
01975         model->nSV[i]=class_sv_count[i];
01976         model->SV[i] = SG_MALLOC(svm_node,class_sv_count[i]);
01977         model->sv_coef[i] = SG_MALLOC(float64_t,class_sv_count[i]);
01978         class_sv_count[i]=0;
01979     }
01980 
01981     for (int32_t i=0;i<prob->l;i++)
01982     {
01983         if(fabs(alpha[i]) > 0)
01984         {
01985             model->SV[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]].index = prob->x[i]->index;
01986             model->sv_coef[(int32_t) y[i]][class_sv_count[(int32_t) y[i]]] = alpha[i];
01987             class_sv_count[(int32_t) y[i]]++;
01988         }
01989     }
01990 
01991     SG_FREE(y);
01992     SG_FREE(zeros);
01993     SG_FREE(alpha);
01994 }
01995 
01996 static void solve_one_class(
01997     const svm_problem *prob, const svm_parameter *param,
01998     float64_t *alpha, Solver::SolutionInfo* si)
01999 {
02000     int32_t l = prob->l;
02001     float64_t *zeros = SG_MALLOC(float64_t, l);
02002     schar *ones = SG_MALLOC(schar, l);
02003     int32_t i;
02004 
02005     int32_t n = (int32_t)(param->nu*prob->l);   // # of alpha's at upper bound
02006 
02007     for(i=0;i<n;i++)
02008         alpha[i] = 1;
02009     if(n<prob->l)
02010         alpha[n] = param->nu * prob->l - n;
02011     for(i=n+1;i<l;i++)
02012         alpha[i] = 0;
02013 
02014     for(i=0;i<l;i++)
02015     {
02016         zeros[i] = 0;
02017         ones[i] = 1;
02018     }
02019 
02020     Solver s;
02021     s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
02022         alpha, 1.0, 1.0, param->eps, si, param->shrinking, param->use_bias);
02023 
02024     SG_FREE(zeros);
02025     SG_FREE(ones);
02026 }
02027 
02028 static void solve_epsilon_svr(
02029     const svm_problem *prob, const svm_parameter *param,
02030     float64_t *alpha, Solver::SolutionInfo* si)
02031 {
02032     int32_t l = prob->l;
02033     float64_t *alpha2 = SG_MALLOC(float64_t, 2*l);
02034     float64_t *linear_term = SG_MALLOC(float64_t, 2*l);
02035     schar *y = SG_MALLOC(schar, 2*l);
02036     int32_t i;
02037 
02038     for(i=0;i<l;i++)
02039     {
02040         alpha2[i] = 0;
02041         linear_term[i] = param->p - prob->y[i];
02042         y[i] = 1;
02043 
02044         alpha2[i+l] = 0;
02045         linear_term[i+l] = param->p + prob->y[i];
02046         y[i+l] = -1;
02047     }
02048 
02049     Solver s;
02050     s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
02051         alpha2, param->C, param->C, param->eps, si, param->shrinking, param->use_bias);
02052 
02053     float64_t sum_alpha = 0;
02054     for(i=0;i<l;i++)
02055     {
02056         alpha[i] = alpha2[i] - alpha2[i+l];
02057         sum_alpha += fabs(alpha[i]);
02058     }
02059     SG_SINFO("nu = %f\n",sum_alpha/(param->C*l));
02060 
02061     SG_FREE(alpha2);
02062     SG_FREE(linear_term);
02063     SG_FREE(y);
02064 }
02065 
02066 static void solve_nu_svr(
02067     const svm_problem *prob, const svm_parameter *param,
02068     float64_t *alpha, Solver::SolutionInfo* si)
02069 {
02070     int32_t l = prob->l;
02071     float64_t C = param->C;
02072     float64_t *alpha2 = SG_MALLOC(float64_t, 2*l);
02073     float64_t *linear_term = SG_MALLOC(float64_t, 2*l);
02074     schar *y = SG_MALLOC(schar, 2*l);
02075     int32_t i;
02076 
02077     float64_t sum = C * param->nu * l / 2;
02078     for(i=0;i<l;i++)
02079     {
02080         alpha2[i] = alpha2[i+l] = CMath::min(sum,C);
02081         sum -= alpha2[i];
02082 
02083         linear_term[i] = - prob->y[i];
02084         y[i] = 1;
02085 
02086         linear_term[i+l] = prob->y[i];
02087         y[i+l] = -1;
02088     }
02089 
02090     Solver_NU s;
02091     s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
02092         alpha2, C, C, param->eps, si, param->shrinking, param->use_bias);
02093 
02094     SG_SINFO("epsilon = %f\n",-si->r);
02095 
02096     for(i=0;i<l;i++)
02097         alpha[i] = alpha2[i] - alpha2[i+l];
02098 
02099     SG_FREE(alpha2);
02100     SG_FREE(linear_term);
02101     SG_FREE(y);
02102 }
02103 
02104 //
02105 // decision_function
02106 //
02107 struct decision_function
02108 {
02109     float64_t *alpha;
02110     float64_t rho;
02111     float64_t objective;
02112 };
02113 
02114 decision_function svm_train_one(
02115     const svm_problem *prob, const svm_parameter *param,
02116     float64_t Cp, float64_t Cn)
02117 {
02118     float64_t *alpha = SG_MALLOC(float64_t, prob->l);
02119     Solver::SolutionInfo si;
02120     switch(param->svm_type)
02121     {
02122         case C_SVC:
02123             solve_c_svc(prob,param,alpha,&si,Cp,Cn);
02124             break;
02125         case NU_SVC:
02126             solve_nu_svc(prob,param,alpha,&si);
02127             break;
02128         case ONE_CLASS:
02129             solve_one_class(prob,param,alpha,&si);
02130             break;
02131         case EPSILON_SVR:
02132             solve_epsilon_svr(prob,param,alpha,&si);
02133             break;
02134         case NU_SVR:
02135             solve_nu_svr(prob,param,alpha,&si);
02136             break;
02137     }
02138 
02139     SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
02140 
02141     // output SVs
02142     if (param->svm_type != ONE_CLASS)
02143     {
02144         int32_t nSV = 0;
02145         int32_t nBSV = 0;
02146         for(int32_t i=0;i<prob->l;i++)
02147         {
02148             if(fabs(alpha[i]) > 0)
02149             {
02150                 ++nSV;
02151                 if(prob->y[i] > 0)
02152                 {
02153                     if(fabs(alpha[i]) >= si.upper_bound_p)
02154                         ++nBSV;
02155                 }
02156                 else
02157                 {
02158                     if(fabs(alpha[i]) >= si.upper_bound_n)
02159                         ++nBSV;
02160                 }
02161             }
02162         }
02163         SG_SINFO("nSV = %d, nBSV = %d\n",nSV,nBSV);
02164     }
02165 
02166     decision_function f;
02167     f.alpha = alpha;
02168     f.rho = si.rho;
02169     f.objective=si.obj;
02170     return f;
02171 }
02172 
02173 //
02174 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02175 // perm, length l, must be allocated before calling this subroutine
02176 void svm_group_classes(
02177     const svm_problem *prob, int32_t *nr_class_ret, int32_t **label_ret,
02178     int32_t **start_ret, int32_t **count_ret, int32_t *perm)
02179 {
02180     int32_t l = prob->l;
02181     int32_t max_nr_class = 16;
02182     int32_t nr_class = 0;
02183     int32_t *label = SG_MALLOC(int32_t, max_nr_class);
02184     int32_t *count = SG_MALLOC(int32_t, max_nr_class);
02185     int32_t *data_label = SG_MALLOC(int32_t, l);
02186     int32_t i;
02187 
02188     for(i=0;i<l;i++)
02189     {
02190         int32_t this_label=(int32_t) prob->y[i];
02191         int32_t j;
02192         for(j=0;j<nr_class;j++)
02193         {
02194             if(this_label == label[j])
02195             {
02196                 ++count[j];
02197                 break;
02198             }
02199         }
02200         data_label[i] = j;
02201         if(j == nr_class)
02202         {
02203             if(nr_class == max_nr_class)
02204             {
02205                 max_nr_class *= 2;
02206                 label=SG_REALLOC(int32_t, label,max_nr_class);
02207                 count=SG_REALLOC(int32_t, count,max_nr_class);
02208             }
02209             label[nr_class] = this_label;
02210             count[nr_class] = 1;
02211             ++nr_class;
02212         }
02213     }
02214 
02215     int32_t *start = SG_MALLOC(int32_t, nr_class);
02216     start[0] = 0;
02217     for(i=1;i<nr_class;i++)
02218         start[i] = start[i-1]+count[i-1];
02219     for(i=0;i<l;i++)
02220     {
02221         perm[start[data_label[i]]] = i;
02222         ++start[data_label[i]];
02223     }
02224     start[0] = 0;
02225     for(i=1;i<nr_class;i++)
02226         start[i] = start[i-1]+count[i-1];
02227 
02228     *nr_class_ret = nr_class;
02229     *label_ret = label;
02230     *start_ret = start;
02231     *count_ret = count;
02232     SG_FREE(data_label);
02233 }
02234 
02235 //
02236 // Interface functions
02237 //
02238 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02239 {
02240     svm_model *model = SG_MALLOC(svm_model,1);
02241     model->param = *param;
02242     model->free_sv = 0; // XXX
02243 
02244     if(param->svm_type == ONE_CLASS ||
02245        param->svm_type == EPSILON_SVR ||
02246        param->svm_type == NU_SVR)
02247     {
02248         SG_SINFO("training one class svm or doing epsilon sv regression\n");
02249 
02250         // regression or one-class-svm
02251         model->nr_class = 2;
02252         model->label = NULL;
02253         model->nSV = NULL;
02254         model->sv_coef = SG_MALLOC(float64_t *,1);
02255         decision_function f = svm_train_one(prob,param,0,0);
02256         model->rho = SG_MALLOC(float64_t, 1);
02257         model->rho[0] = f.rho;
02258         model->objective = f.objective;
02259 
02260         int32_t nSV = 0;
02261         int32_t i;
02262         for(i=0;i<prob->l;i++)
02263             if(fabs(f.alpha[i]) > 0) ++nSV;
02264         model->l = nSV;
02265         model->SV = SG_MALLOC(svm_node *,nSV);
02266         model->sv_coef[0] = SG_MALLOC(float64_t, nSV);
02267         int32_t j = 0;
02268         for(i=0;i<prob->l;i++)
02269             if(fabs(f.alpha[i]) > 0)
02270             {
02271                 model->SV[j] = prob->x[i];
02272                 model->sv_coef[0][j] = f.alpha[i];
02273                 ++j;
02274             }
02275 
02276         SG_FREE(f.alpha);
02277     }
02278     else if(param->svm_type == NU_MULTICLASS_SVC)
02279     {
02280         Solver::SolutionInfo si;
02281         solve_nu_multiclass_svc(prob,param,&si,model);
02282         SG_SINFO("obj = %.16f, rho = %.16f\n",si.obj,si.rho);
02283     }
02284     else
02285     {
02286         // classification
02287         int32_t l = prob->l;
02288         int32_t nr_class;
02289         int32_t *label = NULL;
02290         int32_t *start = NULL;
02291         int32_t *count = NULL;
02292         int32_t *perm = SG_MALLOC(int32_t, l);
02293 
02294         // group training data of the same class
02295         svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02296         svm_node **x = SG_MALLOC(svm_node *,l);
02297         float64_t *C = SG_MALLOC(float64_t,l);
02298         float64_t *pv = SG_MALLOC(float64_t,l);
02299 
02300 
02301         int32_t i;
02302         for(i=0;i<l;i++) {
02303             x[i] = prob->x[perm[i]];
02304             C[i] = prob->C[perm[i]];
02305 
02306             if (prob->pv)
02307             {
02308                 pv[i] = prob->pv[perm[i]];
02309             }
02310             else
02311             {
02312                 //no custom linear term is set
02313                 pv[i] = -1.0;
02314             }
02315 
02316         }
02317 
02318 
02319         // calculate weighted C
02320         float64_t *weighted_C = SG_MALLOC(float64_t, nr_class);
02321         for(i=0;i<nr_class;i++)
02322             weighted_C[i] = param->C;
02323         for(i=0;i<param->nr_weight;i++)
02324         {
02325             int32_t j;
02326             for(j=0;j<nr_class;j++)
02327                 if(param->weight_label[i] == label[j])
02328                     break;
02329             if(j == nr_class)
02330                 SG_SWARNING("warning: class label %d specified in weight is not found\n", param->weight_label[i]);
02331             else
02332                 weighted_C[j] *= param->weight[i];
02333         }
02334 
02335         // train k*(k-1)/2 models
02336 
02337         bool *nonzero = SG_MALLOC(bool,l);
02338         for(i=0;i<l;i++)
02339             nonzero[i] = false;
02340         decision_function *f = SG_MALLOC(decision_function,nr_class*(nr_class-1)/2);
02341 
02342         int32_t p = 0;
02343         for(i=0;i<nr_class;i++)
02344             for(int32_t j=i+1;j<nr_class;j++)
02345             {
02346                 svm_problem sub_prob;
02347                 int32_t si = start[i], sj = start[j];
02348                 int32_t ci = count[i], cj = count[j];
02349                 sub_prob.l = ci+cj;
02350                 sub_prob.x = SG_MALLOC(svm_node *,sub_prob.l);
02351                 sub_prob.y = SG_MALLOC(float64_t,sub_prob.l+1); //dirty hack to surpress valgrind err
02352                 sub_prob.C = SG_MALLOC(float64_t,sub_prob.l+1);
02353                 sub_prob.pv = SG_MALLOC(float64_t,sub_prob.l+1);
02354 
02355                 int32_t k;
02356                 for(k=0;k<ci;k++)
02357                 {
02358                     sub_prob.x[k] = x[si+k];
02359                     sub_prob.y[k] = +1;
02360                     sub_prob.C[k] = C[si+k];
02361                     sub_prob.pv[k] = pv[si+k];
02362 
02363                 }
02364                 for(k=0;k<cj;k++)
02365                 {
02366                     sub_prob.x[ci+k] = x[sj+k];
02367                     sub_prob.y[ci+k] = -1;
02368                     sub_prob.C[ci+k] = C[sj+k];
02369                     sub_prob.pv[ci+k] = pv[sj+k];
02370                 }
02371                 sub_prob.y[sub_prob.l]=-1; //dirty hack to surpress valgrind err
02372                 sub_prob.C[sub_prob.l]=-1;
02373                 sub_prob.pv[sub_prob.l]=-1;
02374 
02375                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02376                 for(k=0;k<ci;k++)
02377                     if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02378                         nonzero[si+k] = true;
02379                 for(k=0;k<cj;k++)
02380                     if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02381                         nonzero[sj+k] = true;
02382                 SG_FREE(sub_prob.x);
02383                 SG_FREE(sub_prob.y);
02384                 SG_FREE(sub_prob.C);
02385                 SG_FREE(sub_prob.pv);
02386                 ++p;
02387             }
02388 
02389         // build output
02390 
02391         model->objective = f[0].objective;
02392         model->nr_class = nr_class;
02393 
02394         model->label = SG_MALLOC(int32_t, nr_class);
02395         for(i=0;i<nr_class;i++)
02396             model->label[i] = label[i];
02397 
02398         model->rho = SG_MALLOC(float64_t, nr_class*(nr_class-1)/2);
02399         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02400             model->rho[i] = f[i].rho;
02401 
02402         int32_t total_sv = 0;
02403         int32_t *nz_count = SG_MALLOC(int32_t, nr_class);
02404         model->nSV = SG_MALLOC(int32_t, nr_class);
02405         for(i=0;i<nr_class;i++)
02406         {
02407             int32_t nSV = 0;
02408             for(int32_t j=0;j<count[i];j++)
02409                 if(nonzero[start[i]+j])
02410                 {
02411                     ++nSV;
02412                     ++total_sv;
02413                 }
02414             model->nSV[i] = nSV;
02415             nz_count[i] = nSV;
02416         }
02417 
02418         SG_SINFO("Total nSV = %d\n",total_sv);
02419 
02420         model->l = total_sv;
02421         model->SV = SG_MALLOC(svm_node *,total_sv);
02422         p = 0;
02423         for(i=0;i<l;i++)
02424             if(nonzero[i]) model->SV[p++] = x[i];
02425 
02426         int32_t *nz_start = SG_MALLOC(int32_t, nr_class);
02427         nz_start[0] = 0;
02428         for(i=1;i<nr_class;i++)
02429             nz_start[i] = nz_start[i-1]+nz_count[i-1];
02430 
02431         model->sv_coef = SG_MALLOC(float64_t *,nr_class-1);
02432         for(i=0;i<nr_class-1;i++)
02433             model->sv_coef[i] = SG_MALLOC(float64_t, total_sv);
02434 
02435         p = 0;
02436         for(i=0;i<nr_class;i++)
02437             for(int32_t j=i+1;j<nr_class;j++)
02438             {
02439                 // classifier (i,j): coefficients with
02440                 // i are in sv_coef[j-1][nz_start[i]...],
02441                 // j are in sv_coef[i][nz_start[j]...]
02442 
02443                 int32_t si = start[i];
02444                 int32_t sj = start[j];
02445                 int32_t ci = count[i];
02446                 int32_t cj = count[j];
02447 
02448                 int32_t q = nz_start[i];
02449                 int32_t k;
02450                 for(k=0;k<ci;k++)
02451                     if(nonzero[si+k])
02452                         model->sv_coef[j-1][q++] = f[p].alpha[k];
02453                 q = nz_start[j];
02454                 for(k=0;k<cj;k++)
02455                     if(nonzero[sj+k])
02456                         model->sv_coef[i][q++] = f[p].alpha[ci+k];
02457                 ++p;
02458             }
02459 
02460         SG_FREE(label);
02461         SG_FREE(count);
02462         SG_FREE(perm);
02463         SG_FREE(start);
02464         SG_FREE(x);
02465         SG_FREE(C);
02466         SG_FREE(pv);
02467         SG_FREE(weighted_C);
02468         SG_FREE(nonzero);
02469         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02470             SG_FREE(f[i].alpha);
02471         SG_FREE(f);
02472         SG_FREE(nz_count);
02473         SG_FREE(nz_start);
02474     }
02475     return model;
02476 }
02477 
02478 void svm_destroy_model(svm_model* model)
02479 {
02480     if(model->free_sv && model->l > 0)
02481         SG_FREE((void *)(model->SV[0]));
02482     for(int32_t i=0;i<model->nr_class-1;i++)
02483         SG_FREE(model->sv_coef[i]);
02484     SG_FREE(model->SV);
02485     SG_FREE(model->sv_coef);
02486     SG_FREE(model->rho);
02487     SG_FREE(model->label);
02488     SG_FREE(model->nSV);
02489     SG_FREE(model);
02490 }
02491 
02492 void svm_destroy_param(svm_parameter* param)
02493 {
02494     SG_FREE(param->weight_label);
02495     SG_FREE(param->weight);
02496 }
02497 
02498 const char *svm_check_parameter(
02499     const svm_problem *prob, const svm_parameter *param)
02500 {
02501     // svm_type
02502 
02503     int32_t svm_type = param->svm_type;
02504     if(svm_type != C_SVC &&
02505        svm_type != NU_SVC &&
02506        svm_type != ONE_CLASS &&
02507        svm_type != EPSILON_SVR &&
02508        svm_type != NU_SVR &&
02509        svm_type != NU_MULTICLASS_SVC)
02510         return "unknown svm type";
02511 
02512     // kernel_type, degree
02513 
02514     int32_t kernel_type = param->kernel_type;
02515     if(kernel_type != LINEAR &&
02516        kernel_type != POLY &&
02517        kernel_type != RBF &&
02518        kernel_type != SIGMOID &&
02519        kernel_type != PRECOMPUTED)
02520         return "unknown kernel type";
02521 
02522     if(param->degree < 0)
02523         return "degree of polynomial kernel < 0";
02524 
02525     // cache_size,eps,C,nu,p,shrinking
02526 
02527     if(param->cache_size <= 0)
02528         return "cache_size <= 0";
02529 
02530     if(param->eps <= 0)
02531         return "eps <= 0";
02532 
02533     if(svm_type == C_SVC ||
02534        svm_type == EPSILON_SVR ||
02535        svm_type == NU_SVR)
02536         if(param->C <= 0)
02537             return "C <= 0";
02538 
02539     if(svm_type == NU_SVC ||
02540        svm_type == ONE_CLASS ||
02541        svm_type == NU_SVR)
02542         if(param->nu <= 0 || param->nu > 1)
02543             return "nu <= 0 or nu > 1";
02544 
02545     if(svm_type == EPSILON_SVR)
02546         if(param->p < 0)
02547             return "p < 0";
02548 
02549     if(param->shrinking != 0 &&
02550        param->shrinking != 1)
02551         return "shrinking != 0 and shrinking != 1";
02552 
02553 
02554     // check whether nu-svc is feasible
02555 
02556     if(svm_type == NU_SVC)
02557     {
02558         int32_t l = prob->l;
02559         int32_t max_nr_class = 16;
02560         int32_t nr_class = 0;
02561         int32_t *label = SG_MALLOC(int32_t, max_nr_class);
02562         int32_t *count = SG_MALLOC(int32_t, max_nr_class);
02563 
02564         int32_t i;
02565         for(i=0;i<l;i++)
02566         {
02567             int32_t this_label = (int32_t) prob->y[i];
02568             int32_t j;
02569             for(j=0;j<nr_class;j++)
02570                 if(this_label == label[j])
02571                 {
02572                     ++count[j];
02573                     break;
02574                 }
02575             if(j == nr_class)
02576             {
02577                 if(nr_class == max_nr_class)
02578                 {
02579                     max_nr_class *= 2;
02580                     label=SG_REALLOC(int32_t, label, max_nr_class);
02581                     count=SG_REALLOC(int32_t, count, max_nr_class);
02582                 }
02583                 label[nr_class] = this_label;
02584                 count[nr_class] = 1;
02585                 ++nr_class;
02586             }
02587         }
02588 
02589         for(i=0;i<nr_class;i++)
02590         {
02591             int32_t n1 = count[i];
02592             for(int32_t j=i+1;j<nr_class;j++)
02593             {
02594                 int32_t n2 = count[j];
02595                 if(param->nu*(n1+n2)/2 > CMath::min(n1,n2))
02596                 {
02597                     SG_FREE(label);
02598                     SG_FREE(count);
02599                     return "specified nu is infeasible";
02600                 }
02601             }
02602         }
02603         SG_FREE(label);
02604         SG_FREE(count);
02605     }
02606 
02607     return NULL;
02608 }
02609 }
02610 #endif // DOXYGEN_SHOULD_SKIP_THIS