00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #ifndef _SVMLight_H___
00023 #define _SVMLight_H___
00024
00025 #include "lib/config.h"
00026
00027 #ifdef USE_SVMLIGHT
00028 #include "classifier/svm/SVM.h"
00029 #include "kernel/Kernel.h"
00030 #include "lib/Mathematics.h"
00031 #include "lib/common.h"
00032
00033 #include <stdio.h>
00034 #include <ctype.h>
00035 #include <string.h>
00036 #include <stdlib.h>
00037 #include <time.h>
00038
00039 namespace shogun
00040 {
00041 # define VERSION "V3.50 -- correct??"
00042 # define VERSION_DATE "01.11.00 -- correct??"
00043
00044 # define DEF_PRECISION 1E-14
00045 # define MAXSHRINK 50000
00046
00047 #ifndef DOXYGEN_SHOULD_SKIP_THIS
00048
00049 struct MODEL {
00051 int32_t sv_num;
00053 int32_t at_upper_bound;
00055 float64_t b;
00057 int32_t* supvec;
00059 float64_t *alpha;
00061 int32_t *index;
00063 int32_t totdoc;
00065 CKernel* kernel;
00066
00067
00069 float64_t loo_error;
00071 float64_t loo_recall;
00073 float64_t loo_precision;
00074
00076 float64_t xa_error;
00078 float64_t xa_recall;
00080 float64_t xa_precision;
00081 };
00082
00084 typedef struct quadratic_program {
00086 int32_t opt_n;
00088 int32_t opt_m;
00090 float64_t *opt_ce;
00092 float64_t *opt_ce0;
00094 float64_t *opt_g;
00096 float64_t *opt_g0;
00098 float64_t *opt_xinit;
00100 float64_t *opt_low;
00102 float64_t *opt_up;
00103 } QP;
00104
00106 typedef int32_t FNUM;
00107
00109 typedef float64_t FVAL;
00110
00112 struct LEARN_PARM {
00114 int32_t type;
00116 float64_t svm_c;
00118 float64_t* eps;
00120 float64_t svm_costratio;
00122 float64_t transduction_posratio;
00123
00125 int32_t biased_hyperplane;
00130 int32_t sharedslack;
00132 int32_t svm_maxqpsize;
00134 int32_t svm_newvarsinqp;
00136 int32_t kernel_cache_size;
00138 float64_t epsilon_crit;
00140 float64_t epsilon_shrink;
00142 int32_t svm_iter_to_shrink;
00146 int32_t maxiter;
00148 int32_t remove_inconsistent;
00152 int32_t skip_final_opt_check;
00154 int32_t compute_loo;
00158 float64_t rho;
00162 int32_t xa_depth;
00164 char predfile[200];
00168 char alphafile[200];
00169
00170
00172 float64_t epsilon_const;
00174 float64_t epsilon_a;
00176 float64_t opt_precision;
00177
00178
00180 int32_t svm_c_steps;
00182 float64_t svm_c_factor;
00184 float64_t svm_costratio_unlab;
00186 float64_t svm_unlabbound;
00188 float64_t *svm_cost;
00189 };
00190
00192 struct TIMING {
00194 int32_t time_kernel;
00196 int32_t time_opti;
00198 int32_t time_shrink;
00200 int32_t time_update;
00202 int32_t time_model;
00204 int32_t time_check;
00206 int32_t time_select;
00207 };
00208
00209
00211 struct SHRINK_STATE
00212 {
00214 int32_t *active;
00216 int32_t *inactive_since;
00218 int32_t deactnum;
00220 float64_t **a_history;
00222 int32_t maxhistory;
00224 float64_t *last_a;
00226 float64_t *last_lin;
00227 };
00228 #endif // DOXYGEN_SHOULD_SKIP_THIS
00229
00231 #define IGNORE_IN_CLASSLIST
00232 IGNORE_IN_CLASSLIST class CSVMLight : public CSVM
00233 {
00234 public:
00236 CSVMLight();
00237
00244 CSVMLight(float64_t C, CKernel* k, CLabels* lab);
00245 virtual ~CSVMLight();
00246
00248 void init();
00249
00258 virtual bool train(CFeatures* data=NULL);
00259
00264 virtual inline EClassifierType get_classifier_type() { return CT_LIGHT; }
00265
00270 int32_t get_runtime();
00271
00272
00274 void svm_learn();
00275
00292 int32_t optimize_to_convergence(
00293 int32_t* docs, int32_t* label, int32_t totdoc, SHRINK_STATE *shrink_state,
00294 int32_t *inconsistent, float64_t *a, float64_t *lin, float64_t *c,
00295 TIMING *timing_profile, float64_t *maxdiff, int32_t heldout,
00296 int32_t retrain);
00297
00308 virtual float64_t compute_objective_function(
00309 float64_t *a, float64_t *lin, float64_t *c, float64_t* eps, int32_t *label,
00310 int32_t totdoc);
00311
00316 void clear_index(int32_t *index);
00317
00323 void add_to_index(int32_t *index, int32_t elem);
00324
00332 int32_t compute_index(int32_t *binfeature, int32_t range, int32_t *index);
00333
00352 void optimize_svm(
00353 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
00354 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t totdoc,
00355 int32_t *working2dnum, int32_t varnum, float64_t *a, float64_t *lin,
00356 float64_t *c, float64_t *aicache, QP *qp, float64_t *epsilon_crit_target);
00357
00375 void compute_matrices_for_optimization(
00376 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
00377 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
00378 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
00379 float64_t *aicache, QP *qp);
00380
00398 void compute_matrices_for_optimization_parallel(
00399 int32_t* docs, int32_t* label, int32_t *exclude_from_eq_const,
00400 float64_t eq_target, int32_t *chosen, int32_t *active2dnum, int32_t *key,
00401 float64_t *a, float64_t *lin, float64_t *c, int32_t varnum, int32_t totdoc,
00402 float64_t *aicache, QP *qp);
00403
00416 int32_t calculate_svm_model(
00417 int32_t* docs, int32_t *label,float64_t *lin, float64_t *a,
00418 float64_t* a_old, float64_t *c, int32_t *working2dnum, int32_t *active2dnum);
00419
00436 int32_t check_optimality(
00437 int32_t *label, float64_t *a, float64_t* lin, float64_t *c, int32_t totdoc,
00438 float64_t *maxdiff, float64_t epsilon_crit_org, int32_t *misclassified,
00439 int32_t *inconsistent,int32_t* active2dnum, int32_t *last_suboptimal_at,
00440 int32_t iteration);
00441
00455 virtual void update_linear_component(
00456 int32_t* docs, int32_t *label, int32_t *active2dnum, float64_t *a,
00457 float64_t* a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
00458 float64_t *aicache, float64_t* c);
00459
00464 static void* update_linear_component_mkl_linadd_helper(void* p);
00465
00478 void update_linear_component_mkl(
00479 int32_t* docs, int32_t *label, int32_t *active2dnum, float64_t *a,
00480 float64_t* a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
00481 float64_t *aicache);
00482
00495 void update_linear_component_mkl_linadd(
00496 int32_t* docs, int32_t *label, int32_t *active2dnum, float64_t *a,
00497 float64_t* a_old, int32_t *working2dnum, int32_t totdoc, float64_t *lin,
00498 float64_t *aicache);
00499
00500 void call_mkl_callback(float64_t* a, int32_t* label, float64_t* lin);
00501
00520 int32_t select_next_qp_subproblem_grad(
00521 int32_t *label, float64_t *a, float64_t* lin, float64_t* c, int32_t totdoc,
00522 int32_t qp_size, int32_t *inconsistent, int32_t* active2dnum,
00523 int32_t* working2dnum, float64_t *selcrit, int32_t *select,
00524 int32_t cache_only, int32_t *key, int32_t *chosen);
00525
00544 int32_t select_next_qp_subproblem_rand(
00545 int32_t* label, float64_t *a, float64_t *lin, float64_t *c,
00546 int32_t totdoc, int32_t qp_size, int32_t *inconsistent,
00547 int32_t *active2dnum, int32_t *working2dnum, float64_t *selcrit,
00548 int32_t *select, int32_t *key, int32_t *chosen, int32_t iteration);
00549
00557 void select_top_n(
00558 float64_t *selcrit, int32_t range, int32_t *select, int32_t n);
00559
00566 void init_shrink_state(
00567 SHRINK_STATE *shrink_state, int32_t totdoc, int32_t maxhistory);
00568
00573 void shrink_state_cleanup(SHRINK_STATE *shrink_state);
00574
00590 int32_t shrink_problem(
00591 SHRINK_STATE *shrink_state, int32_t *active2dnum,
00592 int32_t *last_suboptimal_at, int32_t iteration, int32_t totdoc,
00593 int32_t minshrink, float64_t *a, int32_t *inconsistent, float64_t* c,
00594 float64_t* lin, int* label);
00595
00610 virtual void reactivate_inactive_examples(
00611 int32_t *label,float64_t *a,SHRINK_STATE *shrink_state, float64_t *lin,
00612 float64_t *c, int32_t totdoc,int32_t iteration, int32_t *inconsistent,
00613 int32_t *docs,float64_t *aicache, float64_t* maxdiff);
00614
00615 protected:
00622 inline virtual float64_t compute_kernel(int32_t i, int32_t j)
00623 {
00624 return kernel->kernel(i, j);
00625 }
00626
00631 static void* compute_kernel_helper(void* p);
00632
00637 static void* update_linear_component_linadd_helper(void* p);
00638
00643 static void* reactivate_inactive_examples_vanilla_helper(void* p);
00644
00649 static void* reactivate_inactive_examples_linadd_helper(void* p);
00650
00652 inline virtual const char* get_name() const { return "SVMLight"; }
00653
00654
00655 float64_t *optimize_qp( QP *qp,float64_t *epsilon_crit, int32_t nx,
00656 float64_t *threshold, int32_t& svm_maxqpsize);
00657
00658 protected:
00660 MODEL* model;
00662 LEARN_PARM* learn_parm;
00664 int32_t verbosity;
00665
00667 float64_t init_margin;
00669 int32_t init_iter;
00671 int32_t precision_violations;
00673 float64_t model_b;
00675 float64_t opt_precision;
00677 float64_t* primal;
00679 float64_t* dual;
00680
00681
00682
00686 float64_t* W;
00688 int32_t count;
00690 float64_t mymaxdiff;
00692 bool use_kernel_cache;
00694 bool mkl_converged;
00695 };
00696 }
00697 #endif //USE_SVMLIGHT
00698 #endif //_SVMLight_H___