00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/ui/GUIClassifier.h>
00012 #include <shogun/ui/SGInterface.h>
00013
00014 #include <shogun/lib/config.h>
00015 #include <shogun/io/SGIO.h>
00016
00017 #include <shogun/features/SparseFeatures.h>
00018 #include <shogun/features/RealFileFeatures.h>
00019 #include <shogun/features/Labels.h>
00020
00021 #include <shogun/kernel/AUCKernel.h>
00022
00023 #include <shogun/classifier/KNN.h>
00024 #include <shogun/clustering/KMeans.h>
00025 #include <shogun/clustering/Hierarchical.h>
00026 #include <shogun/classifier/PluginEstimate.h>
00027
00028 #include <shogun/classifier/LDA.h>
00029 #include <shogun/classifier/LPM.h>
00030 #include <shogun/classifier/LPBoost.h>
00031 #include <shogun/classifier/Perceptron.h>
00032
00033 #include <shogun/machine/LinearMachine.h>
00034
00035 #ifdef USE_SVMLIGHT
00036 #include <shogun/classifier/svm/SVMLight.h>
00037 #include <shogun/classifier/svm/SVMLightOneClass.h>
00038 #include <shogun/regression/svr/SVRLight.h>
00039 #endif //USE_SVMLIGHT
00040
00041 #include <shogun/classifier/mkl/MKLClassification.h>
00042 #include <shogun/regression/svr/MKLRegression.h>
00043 #include <shogun/classifier/mkl/MKLOneClass.h>
00044 #include <shogun/classifier/mkl/MKLMultiClass.h>
00045 #include <shogun/classifier/svm/LibSVM.h>
00046 #include <shogun/classifier/svm/LaRank.h>
00047 #include <shogun/classifier/svm/GPBTSVM.h>
00048 #include <shogun/classifier/svm/LibSVMOneClass.h>
00049 #include <shogun/classifier/svm/LibSVMMultiClass.h>
00050
00051 #include <shogun/regression/svr/LibSVR.h>
00052 #include <shogun/regression/KRR.h>
00053
00054 #include <shogun/classifier/svm/LibLinear.h>
00055 #include <shogun/classifier/svm/MPDSVM.h>
00056 #include <shogun/classifier/svm/GNPPSVM.h>
00057 #include <shogun/classifier/svm/GMNPSVM.h>
00058 #include <shogun/classifier/svm/ScatterSVM.h>
00059
00060 #include <shogun/classifier/svm/SVMLin.h>
00061 #include <shogun/classifier/svm/SubGradientSVM.h>
00062 #include <shogun/classifier/SubGradientLPM.h>
00063 #include <shogun/classifier/svm/SVMOcas.h>
00064 #include <shogun/classifier/svm/SVMSGD.h>
00065 #include <shogun/classifier/svm/WDSVMOcas.h>
00066
00067 using namespace shogun;
00068
00069 CGUIClassifier::CGUIClassifier(CSGInterface* ui_)
00070 : CSGObject(), ui(ui_)
00071 {
00072 constraint_generator=NULL;
00073 classifier=NULL;
00074 max_train_time=0;
00075
00076
00077 perceptron_learnrate=0.1;
00078 perceptron_maxiter=1000;
00079
00080
00081 svm_qpsize=41;
00082 svm_bufsize=3000;
00083 svm_max_qpsize=1000;
00084 mkl_norm=1;
00085 ent_lambda=0;
00086 mkl_block_norm=4;
00087 svm_C1=1;
00088 svm_C2=1;
00089 C_mkl=0;
00090 mkl_use_interleaved=true;
00091 svm_weight_epsilon=1e-5;
00092 svm_epsilon=1e-5;
00093 svm_tube_epsilon=1e-2;
00094 svm_nu=0.5;
00095 svm_use_shrinking = true ;
00096
00097 svm_use_bias = true;
00098 svm_use_batch_computation = true ;
00099 svm_use_linadd = true ;
00100 svm_do_auc_maximization = false ;
00101
00102
00103 krr_tau=1;
00104
00105 solver_type=ST_AUTO;
00106 }
00107
00108 CGUIClassifier::~CGUIClassifier()
00109 {
00110 SG_UNREF(classifier);
00111 SG_UNREF(constraint_generator);
00112 }
00113
00114 bool CGUIClassifier::new_classifier(char* name, int32_t d, int32_t from_d)
00115 {
00116 if (strcmp(name,"LIBSVM_ONECLASS")==0)
00117 {
00118 SG_UNREF(classifier);
00119 classifier = new CLibSVMOneClass();
00120 SG_INFO("created SVMlibsvm object for oneclass\n");
00121 }
00122 else if (strcmp(name,"LIBSVM_MULTICLASS")==0)
00123 {
00124 SG_UNREF(classifier);
00125 classifier = new CLibSVMMultiClass();
00126 SG_INFO("created SVMlibsvm object for multiclass\n");
00127 }
00128 else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0)
00129 {
00130 SG_UNREF(classifier);
00131 classifier= new CLibSVMMultiClass(LIBSVM_NU_SVC);
00132 SG_INFO("created SVMlibsvm object for multiclass\n") ;
00133 }
00134 #ifdef USE_SVMLIGHT
00135 else if (strcmp(name,"SCATTERSVM_NO_BIAS_SVMLIGHT")==0)
00136 {
00137 SG_UNREF(classifier);
00138 classifier= new CScatterSVM(NO_BIAS_SVMLIGHT);
00139 SG_INFO("created ScatterSVM NO BIAS SVMLIGHT object\n") ;
00140 }
00141 #endif //USE_SVMLIGHT
00142 else if (strcmp(name,"SCATTERSVM_NO_BIAS_LIBSVM")==0)
00143 {
00144 SG_UNREF(classifier);
00145 classifier= new CScatterSVM(NO_BIAS_LIBSVM);
00146 SG_INFO("created ScatterSVM NO BIAS LIBSVM object\n") ;
00147 }
00148 else if (strcmp(name,"SCATTERSVM_TESTRULE1")==0)
00149 {
00150 SG_UNREF(classifier);
00151 classifier= new CScatterSVM(TEST_RULE1);
00152 SG_INFO("created ScatterSVM TESTRULE1 object\n") ;
00153 }
00154 else if (strcmp(name,"SCATTERSVM_TESTRULE2")==0)
00155 {
00156 SG_UNREF(classifier);
00157 classifier= new CScatterSVM(TEST_RULE2);
00158 SG_INFO("created ScatterSVM TESTRULE2 object\n") ;
00159 }
00160 else if (strcmp(name,"LIBSVM_NU")==0)
00161 {
00162 SG_UNREF(classifier);
00163 classifier= new CLibSVM(LIBSVM_NU_SVC);
00164 SG_INFO("created SVMlibsvm object\n") ;
00165 }
00166 else if (strcmp(name,"LIBSVM")==0)
00167 {
00168 SG_UNREF(classifier);
00169 classifier= new CLibSVM();
00170 SG_INFO("created SVMlibsvm object\n") ;
00171 }
00172 else if (strcmp(name,"LARANK")==0)
00173 {
00174 SG_UNREF(classifier);
00175 classifier= new CLaRank();
00176 SG_INFO("created LaRank object\n") ;
00177 }
00178 #ifdef USE_SVMLIGHT
00179 else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0))
00180 {
00181 SG_UNREF(classifier);
00182 classifier= new CSVMLight();
00183 SG_INFO("created SVMLight object\n") ;
00184 }
00185 else if (strcmp(name,"SVMLIGHT_ONECLASS")==0)
00186 {
00187 SG_UNREF(classifier);
00188 classifier= new CSVMLightOneClass();
00189 SG_INFO("created SVMLightOneClass object\n") ;
00190 }
00191 else if (strcmp(name,"SVRLIGHT")==0)
00192 {
00193 SG_UNREF(classifier);
00194 classifier= new CSVRLight();
00195 SG_INFO("created SVRLight object\n") ;
00196 }
00197 #endif //USE_SVMLIGHT
00198 else if (strcmp(name,"GPBTSVM")==0)
00199 {
00200 SG_UNREF(classifier);
00201 classifier= new CGPBTSVM();
00202 SG_INFO("created GPBT-SVM object\n") ;
00203 }
00204 else if (strcmp(name,"MPDSVM")==0)
00205 {
00206 SG_UNREF(classifier);
00207 classifier= new CMPDSVM();
00208 SG_INFO("created MPD-SVM object\n") ;
00209 }
00210 else if (strcmp(name,"GNPPSVM")==0)
00211 {
00212 SG_UNREF(classifier);
00213 classifier= new CGNPPSVM();
00214 SG_INFO("created GNPP-SVM object\n") ;
00215 }
00216 else if (strcmp(name,"GMNPSVM")==0)
00217 {
00218 SG_UNREF(classifier);
00219 classifier= new CGMNPSVM();
00220 SG_INFO("created GMNP-SVM object\n") ;
00221 }
00222 else if (strcmp(name,"LIBSVR")==0)
00223 {
00224 SG_UNREF(classifier);
00225 classifier= new CLibSVR();
00226 SG_INFO("created SVRlibsvm object\n") ;
00227 }
00228 #ifdef HAVE_LAPACK
00229 else if (strcmp(name, "KRR")==0)
00230 {
00231 SG_UNREF(classifier);
00232 classifier=new CKRR(krr_tau, ui->ui_kernel->get_kernel(),
00233 ui->ui_labels->get_train_labels());
00234 SG_INFO("created KRR object %p\n", classifier);
00235 }
00236 #endif //HAVE_LAPACK
00237 else if (strcmp(name,"PERCEPTRON")==0)
00238 {
00239 SG_UNREF(classifier);
00240 classifier= new CPerceptron();
00241 SG_INFO("created Perceptron object\n") ;
00242 }
00243 #ifdef HAVE_LAPACK
00244 else if (strncmp(name,"LIBLINEAR",9)==0)
00245 {
00246 LIBLINEAR_SOLVER_TYPE st=L2R_LR;
00247
00248 if (strcmp(name,"LIBLINEAR_L2R_LR")==0)
00249 {
00250 st=L2R_LR;
00251 SG_INFO("created LibLinear l2 regularized logistic regression object\n") ;
00252 }
00253 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC_DUAL")==0)
00254 {
00255 st=L2R_L2LOSS_SVC_DUAL;
00256 SG_INFO("created LibLinear l2 regularized l2 loss SVM dual object\n") ;
00257 }
00258 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC")==0)
00259 {
00260 st=L2R_L2LOSS_SVC;
00261 SG_INFO("created LibLinear l2 regularized l2 loss SVM primal object\n") ;
00262 }
00263 else if (strcmp(name,"LIBLINEAR_L1R_L2LOSS_SVC")==0)
00264 {
00265 st=L1R_L2LOSS_SVC;
00266 SG_INFO("created LibLinear l1 regularized l2 loss SVM primal object\n") ;
00267 }
00268 else if (strcmp(name,"LIBLINEAR_L2R_L1LOSS_SVC_DUAL")==0)
00269 {
00270 st=L2R_L1LOSS_SVC_DUAL;
00271 SG_INFO("created LibLinear l2 regularized l1 loss dual SVM object\n") ;
00272 }
00273 else
00274 SG_ERROR("unknown liblinear type\n");
00275
00276 SG_UNREF(classifier);
00277 classifier= new CLibLinear(st);
00278 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2);
00279 ((CLibLinear*) classifier)->set_epsilon(svm_epsilon);
00280 ((CLibLinear*) classifier)->set_bias_enabled(svm_use_bias);
00281 }
00282 else if (strcmp(name,"LDA")==0)
00283 {
00284 SG_UNREF(classifier);
00285 classifier= new CLDA();
00286 SG_INFO("created LDA object\n") ;
00287 }
00288 #endif //HAVE_LAPACK
00289 #ifdef USE_CPLEX
00290 else if (strcmp(name,"LPM")==0)
00291 {
00292 SG_UNREF(classifier);
00293 classifier= new CLPM();
00294 ((CLPM*) classifier)->set_C(svm_C1, svm_C2);
00295 ((CLPM*) classifier)->set_epsilon(svm_epsilon);
00296 ((CLPM*) classifier)->set_bias_enabled(svm_use_bias);
00297 ((CLPM*) classifier)->set_max_train_time(max_train_time);
00298 SG_INFO("created LPM object\n") ;
00299 }
00300 else if (strcmp(name,"LPBOOST")==0)
00301 {
00302 SG_UNREF(classifier);
00303 classifier= new CLPBoost();
00304 ((CLPBoost*) classifier)->set_C(svm_C1, svm_C2);
00305 ((CLPBoost*) classifier)->set_epsilon(svm_epsilon);
00306 ((CLPBoost*) classifier)->set_bias_enabled(svm_use_bias);
00307 ((CLPBoost*) classifier)->set_max_train_time(max_train_time);
00308 SG_INFO("created LPBoost object\n") ;
00309 }
00310 else if (strcmp(name,"SUBGRADIENTLPM")==0)
00311 {
00312 SG_UNREF(classifier);
00313 classifier= new CSubGradientLPM();
00314
00315 ((CSubGradientLPM*) classifier)->set_bias_enabled(svm_use_bias);
00316 ((CSubGradientLPM*) classifier)->set_qpsize(svm_qpsize);
00317 ((CSubGradientLPM*) classifier)->set_qpsize_max(svm_max_qpsize);
00318 ((CSubGradientLPM*) classifier)->set_C(svm_C1, svm_C2);
00319 ((CSubGradientLPM*) classifier)->set_epsilon(svm_epsilon);
00320 ((CSubGradientLPM*) classifier)->set_max_train_time(max_train_time);
00321 SG_INFO("created Subgradient LPM object\n") ;
00322 }
00323 #endif //USE_CPLEX
00324 else if (strncmp(name,"KNN", strlen("KNN"))==0)
00325 {
00326 SG_UNREF(classifier);
00327 classifier= new CKNN();
00328 SG_INFO("created KNN object\n") ;
00329 }
00330 else if (strncmp(name,"KMEANS", strlen("KMEANS"))==0)
00331 {
00332 SG_UNREF(classifier);
00333 classifier= new CKMeans();
00334 SG_INFO("created KMeans object\n") ;
00335 }
00336 else if (strncmp(name,"HIERARCHICAL", strlen("HIERARCHICAL"))==0)
00337 {
00338 SG_UNREF(classifier);
00339 classifier= new CHierarchical();
00340 SG_INFO("created Hierarchical clustering object\n") ;
00341 }
00342 else if (strcmp(name,"SVMLIN")==0)
00343 {
00344 SG_UNREF(classifier);
00345 classifier= new CSVMLin();
00346 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2);
00347 ((CSVMLin*) classifier)->set_epsilon(svm_epsilon);
00348 ((CSVMLin*) classifier)->set_bias_enabled(svm_use_bias);
00349 SG_INFO("created SVMLin object\n") ;
00350 }
00351 else if (strcmp(name,"SUBGRADIENTSVM")==0)
00352 {
00353 SG_UNREF(classifier);
00354 classifier= new CSubGradientSVM();
00355
00356 ((CSubGradientSVM*) classifier)->set_bias_enabled(svm_use_bias);
00357 ((CSubGradientSVM*) classifier)->set_qpsize(svm_qpsize);
00358 ((CSubGradientSVM*) classifier)->set_qpsize_max(svm_max_qpsize);
00359 ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2);
00360 ((CSubGradientSVM*) classifier)->set_epsilon(svm_epsilon);
00361 ((CSubGradientSVM*) classifier)->set_max_train_time(max_train_time);
00362 SG_INFO("created Subgradient SVM object\n") ;
00363 }
00364 else if (strncmp(name,"WDSVMOCAS", strlen("WDSVMOCAS"))==0)
00365 {
00366 SG_UNREF(classifier);
00367 classifier= new CWDSVMOcas(SVM_OCAS);
00368
00369 ((CWDSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00370 ((CWDSVMOcas*) classifier)->set_degree(d, from_d);
00371 ((CWDSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00372 ((CWDSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00373 ((CWDSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00374 SG_INFO("created Weighted Degree Kernel SVM Ocas(OCAS) object of order %d (from order:%d)\n", d, from_d) ;
00375 }
00376 else if (strcmp(name,"SVMOCAS")==0)
00377 {
00378 SG_UNREF(classifier);
00379 classifier= new CSVMOcas(SVM_OCAS);
00380
00381 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00382 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00383 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00384 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00385 SG_INFO("created SVM Ocas(OCAS) object\n") ;
00386 }
00387 else if (strcmp(name,"SVMSGD")==0)
00388 {
00389 SG_UNREF(classifier);
00390 classifier= new CSVMSGD(svm_C1);
00391 ((CSVMSGD*) classifier)->set_bias_enabled(svm_use_bias);
00392 SG_INFO("created SVM SGD object\n") ;
00393 }
00394 else if (strcmp(name,"SVMBMRM")==0 || (strcmp(name,"SVMPERF")==0))
00395 {
00396 SG_UNREF(classifier);
00397 classifier= new CSVMOcas(SVM_BMRM);
00398
00399 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00400 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00401 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00402 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00403 SG_INFO("created SVM Ocas(BMRM/PERF) object\n") ;
00404 }
00405 else if (strcmp(name,"MKL_CLASSIFICATION")==0)
00406 {
00407 SG_UNREF(classifier);
00408 classifier= new CMKLClassification();
00409 }
00410
00411
00412
00413
00414
00415 else if (strcmp(name,"MKL_ONECLASS")==0)
00416 {
00417 SG_UNREF(classifier);
00418 classifier= new CMKLOneClass();
00419 }
00420 else if (strcmp(name,"MKL_MULTICLASS")==0)
00421 {
00422 SG_UNREF(classifier);
00423 classifier= new CMKLMultiClass();
00424 }
00425 else if (strcmp(name,"MKL_REGRESSION")==0)
00426 {
00427 SG_UNREF(classifier);
00428 classifier= new CMKLRegression();
00429 }
00430 else
00431 {
00432 SG_ERROR("Unknown classifier %s.\n", name);
00433 return false;
00434 }
00435 SG_REF(classifier);
00436
00437 return (classifier!=NULL);
00438 }
00439
00440 bool CGUIClassifier::train_mkl_multiclass()
00441 {
00442 CMKLMultiClass* mkl= (CMKLMultiClass*) classifier;
00443 if (!mkl)
00444 SG_ERROR("No MKL available.\n");
00445
00446 CLabels* trainlabels=ui->ui_labels->get_train_labels();
00447 if (!trainlabels)
00448 SG_ERROR("No trainlabels available.\n");
00449
00450 CKernel* kernel=ui->ui_kernel->get_kernel();
00451 if (!kernel)
00452 SG_ERROR("No kernel available.\n");
00453
00454 bool success=ui->ui_kernel->init_kernel("TRAIN");
00455
00456 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00457 SG_ERROR("Kernel not initialized / no train features available.\n");
00458
00459 int32_t num_vec=kernel->get_num_vec_lhs();
00460 if (trainlabels->get_num_labels() != num_vec)
00461 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00462
00463 SG_INFO("Starting MC-MKL training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00464
00465 mkl->set_mkl_epsilon(svm_weight_epsilon);
00466 mkl->set_mkl_norm(mkl_norm);
00467
00468 mkl->set_solver_type(solver_type);
00469 mkl->set_bias_enabled(svm_use_bias);
00470 mkl->set_epsilon(svm_epsilon);
00471 mkl->set_max_train_time(max_train_time);
00472 mkl->set_tube_epsilon(svm_tube_epsilon);
00473 mkl->set_nu(svm_nu);
00474 mkl->set_C(svm_C1, svm_C2);
00475 mkl->set_qpsize(svm_qpsize);
00476 mkl->set_shrinking_enabled(svm_use_shrinking);
00477 mkl->set_linadd_enabled(svm_use_linadd);
00478 mkl->set_batch_computation_enabled(svm_use_batch_computation);
00479
00480 ((CKernelMachine*) mkl)->set_labels(trainlabels);
00481 ((CKernelMachine*) mkl)->set_kernel(kernel);
00482
00483 return mkl->train();
00484 }
00485
00486 bool CGUIClassifier::train_mkl()
00487 {
00488 CMKL* mkl= (CMKL*) classifier;
00489 if (!mkl)
00490 SG_ERROR("No SVM available.\n");
00491
00492 bool oneclass=(mkl->get_classifier_type()==CT_LIBSVMONECLASS);
00493 CLabels* trainlabels=NULL;
00494 if(!oneclass)
00495 trainlabels=ui->ui_labels->get_train_labels();
00496 else
00497 SG_INFO("Training one class mkl.\n");
00498 if (!trainlabels && !oneclass)
00499 SG_ERROR("No trainlabels available.\n");
00500
00501 CKernel* kernel=ui->ui_kernel->get_kernel();
00502 if (!kernel)
00503 SG_ERROR("No kernel available.\n");
00504
00505 bool success=ui->ui_kernel->init_kernel("TRAIN");
00506 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00507 SG_ERROR("Kernel not initialized.\n");
00508
00509 int32_t num_vec=kernel->get_num_vec_lhs();
00510 if (!oneclass && trainlabels->get_num_labels() != num_vec)
00511 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00512
00513 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00514
00515 if (constraint_generator)
00516 mkl->set_constraint_generator(constraint_generator);
00517 mkl->set_solver_type(solver_type);
00518 mkl->set_bias_enabled(svm_use_bias);
00519 mkl->set_epsilon(svm_epsilon);
00520 mkl->set_max_train_time(max_train_time);
00521 mkl->set_tube_epsilon(svm_tube_epsilon);
00522 mkl->set_nu(svm_nu);
00523 mkl->set_C(svm_C1, svm_C2);
00524 mkl->set_qpsize(svm_qpsize);
00525 mkl->set_shrinking_enabled(svm_use_shrinking);
00526 mkl->set_linadd_enabled(svm_use_linadd);
00527 mkl->set_batch_computation_enabled(svm_use_batch_computation);
00528 mkl->set_mkl_epsilon(svm_weight_epsilon);
00529 mkl->set_mkl_norm(mkl_norm);
00530 mkl->set_elasticnet_lambda(ent_lambda);
00531 mkl->set_mkl_block_norm(mkl_block_norm);
00532 mkl->set_C_mkl(C_mkl);
00533 mkl->set_interleaved_optimization_enabled(mkl_use_interleaved);
00534
00535 if (svm_do_auc_maximization)
00536 {
00537 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel);
00538 CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels);
00539 ((CKernelMachine*) mkl)->set_labels(auc_labels);
00540 ((CKernelMachine*) mkl)->set_kernel(auc_kernel);
00541 SG_UNREF(auc_labels);
00542 }
00543 else
00544 {
00545 if(!oneclass)
00546 ((CKernelMachine*) mkl)->set_labels(trainlabels);
00547 ((CKernelMachine*) mkl)->set_kernel(kernel);
00548 }
00549
00550 bool result=mkl->train();
00551
00552 return result;
00553 }
00554
00555 bool CGUIClassifier::train_svm()
00556 {
00557 CSVM* svm= (CSVM*) classifier;
00558 if (!svm)
00559 SG_ERROR("No SVM available.\n");
00560
00561 bool oneclass=(svm->get_classifier_type()==CT_LIBSVMONECLASS);
00562 CLabels* trainlabels=NULL;
00563 if(!oneclass)
00564 trainlabels=ui->ui_labels->get_train_labels();
00565 else
00566 SG_INFO("Training one class svm.\n");
00567 if (!trainlabels && !oneclass)
00568 SG_ERROR("No trainlabels available.\n");
00569
00570 CKernel* kernel=ui->ui_kernel->get_kernel();
00571 if (!kernel)
00572 SG_ERROR("No kernel available.\n");
00573
00574 bool success=ui->ui_kernel->init_kernel("TRAIN");
00575
00576 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00577 SG_ERROR("Kernel not initialized / no train features available.\n");
00578
00579 int32_t num_vec=kernel->get_num_vec_lhs();
00580 if (!oneclass && trainlabels->get_num_labels() != num_vec)
00581 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00582
00583 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00584
00585 svm->set_solver_type(solver_type);
00586 svm->set_bias_enabled(svm_use_bias);
00587 svm->set_epsilon(svm_epsilon);
00588 svm->set_max_train_time(max_train_time);
00589 svm->set_tube_epsilon(svm_tube_epsilon);
00590 svm->set_nu(svm_nu);
00591 svm->set_C(svm_C1, svm_C2);
00592 svm->set_qpsize(svm_qpsize);
00593 svm->set_shrinking_enabled(svm_use_shrinking);
00594 svm->set_linadd_enabled(svm_use_linadd);
00595 svm->set_batch_computation_enabled(svm_use_batch_computation);
00596
00597 if(svm->get_classifier_type()==CT_MKLMULTICLASS)
00598 {
00599 ((CMKLMultiClass *)svm)->set_mkl_epsilon(svm_weight_epsilon );
00600 }
00601
00602 if (svm_do_auc_maximization)
00603 {
00604 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel);
00605 CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels);
00606 ((CKernelMachine*) svm)->set_labels(auc_labels);
00607 ((CKernelMachine*) svm)->set_kernel(auc_kernel);
00608 SG_UNREF(auc_labels);
00609 }
00610 else
00611 {
00612 if(!oneclass)
00613 ((CKernelMachine*) svm)->set_labels(trainlabels);
00614 ((CKernelMachine*) svm)->set_kernel(kernel);
00615 }
00616
00617 bool result=svm->train();
00618
00619 return result;
00620 }
00621
00622 bool CGUIClassifier::train_clustering(int32_t k, int32_t max_iter)
00623 {
00624 bool result=false;
00625 CDistance* distance=ui->ui_distance->get_distance();
00626
00627 if (!distance)
00628 SG_ERROR("No distance available\n");
00629
00630 if (!ui->ui_distance->init_distance("TRAIN"))
00631 SG_ERROR("Initializing distance with train features failed.\n");
00632
00633 ((CDistanceMachine*) classifier)->set_distance(distance);
00634
00635 EClassifierType type=classifier->get_classifier_type();
00636 switch (type)
00637 {
00638 case CT_KMEANS:
00639 {
00640 ((CKMeans*) classifier)->set_k(k);
00641 ((CKMeans*) classifier)->set_max_iter(max_iter);
00642 result=((CKMeans*) classifier)->train();
00643 break;
00644 }
00645 case CT_HIERARCHICAL:
00646 {
00647 ((CHierarchical*) classifier)->set_merges(k);
00648 result=((CHierarchical*) classifier)->train();
00649 break;
00650 }
00651 default:
00652 SG_ERROR("Unknown clustering type %d\n", type);
00653 }
00654
00655 return result;
00656 }
00657
00658 bool CGUIClassifier::train_knn(int32_t k)
00659 {
00660 CLabels* trainlabels=ui->ui_labels->get_train_labels();
00661 CDistance* distance=ui->ui_distance->get_distance();
00662
00663 bool result=false;
00664
00665 if (trainlabels)
00666 {
00667 if (distance)
00668 {
00669 if (!ui->ui_distance->init_distance("TRAIN"))
00670 SG_ERROR("Initializing distance with train features failed.\n");
00671 ((CKNN*) classifier)->set_labels(trainlabels);
00672 ((CKNN*) classifier)->set_distance(distance);
00673 ((CKNN*) classifier)->set_k(k);
00674 result=((CKNN*) classifier)->train();
00675 }
00676 else
00677 SG_ERROR("No distance available.\n");
00678 }
00679 else
00680 SG_ERROR("No labels available\n");
00681
00682 return result;
00683 }
00684
00685 bool CGUIClassifier::train_krr()
00686 {
00687 #ifdef HAVE_LAPACK
00688 CKRR* krr= (CKRR*) classifier;
00689 if (!krr)
00690 SG_ERROR("No SVM available.\n");
00691
00692 CLabels* trainlabels=NULL;
00693 trainlabels=ui->ui_labels->get_train_labels();
00694 if (!trainlabels)
00695 SG_ERROR("No trainlabels available.\n");
00696
00697 CKernel* kernel=ui->ui_kernel->get_kernel();
00698 if (!kernel)
00699 SG_ERROR("No kernel available.\n");
00700
00701 bool success=ui->ui_kernel->init_kernel("TRAIN");
00702
00703 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00704 SG_ERROR("Kernel not initialized / no train features available.\n");
00705
00706 int32_t num_vec=kernel->get_num_vec_lhs();
00707 if (trainlabels->get_num_labels() != num_vec)
00708 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00709
00710
00711
00712 krr->set_labels(trainlabels);
00713 krr->set_kernel(kernel);
00714
00715 bool result=krr->train();
00716 return result;
00717 #else
00718 return false;
00719 #endif
00720 }
00721
00722 bool CGUIClassifier::train_linear(float64_t gamma)
00723 {
00724 ASSERT(classifier);
00725 EClassifierType ctype = classifier->get_classifier_type();
00726 CFeatures* trainfeatures=ui->ui_features->get_train_features();
00727 CLabels* trainlabels=ui->ui_labels->get_train_labels();
00728 bool result=false;
00729
00730 if (!trainfeatures)
00731 SG_ERROR("No trainfeatures available.\n");
00732
00733 if (!trainfeatures->has_property(FP_DOT))
00734 SG_ERROR("Trainfeatures not based on DotFeatures.\n");
00735
00736 if (!trainlabels)
00737 SG_ERROR("No labels available\n");
00738
00739 if (ctype==CT_PERCEPTRON)
00740 {
00741 ((CPerceptron*) classifier)->set_learn_rate(perceptron_learnrate);
00742 ((CPerceptron*) classifier)->set_max_iter(perceptron_maxiter);
00743 }
00744
00745 #ifdef HAVE_LAPACK
00746 if (ctype==CT_LDA)
00747 {
00748 if (trainfeatures->get_feature_type()!=F_DREAL ||
00749 trainfeatures->get_feature_class()!=C_SIMPLE)
00750 SG_ERROR("LDA requires train features of class SIMPLE type REAL.\n");
00751 ((CLDA*) classifier)->set_gamma(gamma);
00752 }
00753 #endif
00754
00755 if (ctype==CT_SVMOCAS)
00756 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00757 #ifdef HAVE_LAPACK
00758 else if (ctype==CT_LIBLINEAR)
00759 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2);
00760 #endif
00761 else if (ctype==CT_SVMLIN)
00762 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2);
00763 else if (ctype==CT_SVMSGD)
00764 ((CSVMSGD*) classifier)->set_C(svm_C1, svm_C2);
00765 else if (ctype==CT_SUBGRADIENTSVM)
00766 ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2);
00767
00768 else if (ctype==CT_LPM || ctype==CT_LPBOOST)
00769 {
00770 if (trainfeatures->get_feature_class()!=C_SPARSE ||
00771 trainfeatures->get_feature_type()!=F_DREAL)
00772 SG_ERROR("LPM and LPBOOST require trainfeatures of class SPARSE type REAL.\n");
00773 }
00774
00775 ((CLinearMachine*) classifier)->set_labels(trainlabels);
00776 ((CLinearMachine*) classifier)->set_features((CSimpleFeatures<float64_t>*) trainfeatures);
00777 result=((CLinearMachine*) classifier)->train();
00778
00779 return result;
00780 }
00781
00782 bool CGUIClassifier::train_wdocas()
00783 {
00784 CFeatures* trainfeatures=ui->ui_features->get_train_features();
00785 CLabels* trainlabels=ui->ui_labels->get_train_labels();
00786
00787 bool result=false;
00788
00789 if (!trainfeatures)
00790 SG_ERROR("No trainfeatures available.\n");
00791
00792 if (trainfeatures->get_feature_class()!=C_STRING ||
00793 trainfeatures->get_feature_type()!=F_BYTE )
00794 SG_ERROR("Trainfeatures are not of class STRING type BYTE.\n");
00795
00796 if (!trainlabels)
00797 SG_ERROR("No labels available.\n");
00798
00799 ((CWDSVMOcas*) classifier)->set_labels(trainlabels);
00800 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) trainfeatures);
00801 result=((CWDSVMOcas*) classifier)->train();
00802
00803 return result;
00804 }
00805
00806 bool CGUIClassifier::load(char* filename, char* type)
00807 {
00808 bool result=false;
00809
00810 if (new_classifier(type))
00811 {
00812 FILE* model_file=fopen(filename, "r");
00813
00814 if (model_file)
00815 {
00816 if (classifier && classifier->load(model_file))
00817 {
00818 SG_DEBUG("file successfully read.\n");
00819 result=true;
00820 }
00821 else
00822 SG_ERROR("SVM/Classifier creation/loading failed on file %s.\n", filename);
00823
00824 fclose(model_file);
00825 }
00826 else
00827 SG_ERROR("Opening file %s failed.\n", filename);
00828
00829 return result;
00830 }
00831 else
00832 SG_ERROR("Type %s of SVM/Classifier unknown.\n", type);
00833
00834 return false;
00835 }
00836
00837 bool CGUIClassifier::save(char* param)
00838 {
00839 bool result=false;
00840 param=SGIO::skip_spaces(param);
00841
00842 if (classifier)
00843 {
00844 FILE* file=fopen(param, "w");
00845
00846 if ((!file) || (!classifier->save(file)))
00847 printf("writing to file %s failed!\n", param);
00848 else
00849 {
00850 printf("successfully written classifier into \"%s\" !\n", param);
00851 result=true;
00852 }
00853
00854 if (file)
00855 fclose(file);
00856 }
00857 else
00858 SG_ERROR("create classifier first\n");
00859
00860 return result;
00861 }
00862
00863 bool CGUIClassifier::set_perceptron_parameters(
00864 float64_t learnrate, int32_t maxiter)
00865 {
00866 if (learnrate<=0)
00867 perceptron_learnrate=0.01;
00868 else
00869 perceptron_learnrate=learnrate;
00870
00871 if (maxiter<=0)
00872 perceptron_maxiter=1000;
00873 else
00874 perceptron_maxiter=maxiter;
00875 SG_INFO("Setting to perceptron parameters (learnrate %f and maxiter: %d\n", perceptron_learnrate, perceptron_maxiter);
00876
00877 return true;
00878 }
00879
00880 bool CGUIClassifier::set_svm_epsilon(float64_t epsilon)
00881 {
00882 if (epsilon<0)
00883 svm_epsilon=1e-4;
00884 else
00885 svm_epsilon=epsilon;
00886 SG_INFO("Set to svm_epsilon=%f.\n", svm_epsilon);
00887
00888 return true;
00889 }
00890
00891 bool CGUIClassifier::set_max_train_time(float64_t max)
00892 {
00893 if (max>0)
00894 {
00895 max_train_time=max;
00896 SG_INFO("Set to max_train_time=%f.\n", max_train_time);
00897 }
00898 else
00899 SG_INFO("Disabling max_train_time.\n");
00900
00901 return true;
00902 }
00903
00904 bool CGUIClassifier::set_svr_tube_epsilon(float64_t tube_epsilon)
00905 {
00906 if (!classifier)
00907 SG_ERROR("No regression method allocated\n");
00908
00909 if (classifier->get_classifier_type() != CT_LIBSVR &&
00910 classifier->get_classifier_type() != CT_SVRLIGHT &&
00911 classifier->get_classifier_type() != CT_MKLREGRESSION )
00912 {
00913 SG_ERROR("Underlying method not capable of SV-regression\n");
00914 }
00915
00916 if (tube_epsilon<0)
00917 svm_tube_epsilon=1e-2;
00918 svm_tube_epsilon=tube_epsilon;
00919
00920 ((CSVM*) classifier)->set_tube_epsilon(svm_tube_epsilon);
00921 SG_INFO("Set to svr_tube_epsilon=%f.\n", svm_tube_epsilon);
00922
00923 return true;
00924 }
00925
00926 bool CGUIClassifier::set_svm_nu(float64_t nu)
00927 {
00928 if (nu<0 || nu>1)
00929 nu=0.5;
00930
00931 svm_nu=nu;
00932 SG_INFO("Set to nu=%f.\n", svm_nu);
00933
00934 return true;
00935 }
00936
00937 bool CGUIClassifier::set_svm_mkl_parameters(
00938 float64_t weight_epsilon, float64_t C, float64_t norm)
00939 {
00940 if (weight_epsilon<0)
00941 weight_epsilon=1e-4;
00942 if (C<0)
00943 C=0;
00944 if (norm<0)
00945 SG_ERROR("MKL norm >= 0\n");
00946
00947 svm_weight_epsilon=weight_epsilon;
00948 C_mkl=C;
00949 mkl_norm=norm;
00950
00951 SG_INFO("Set to weight_epsilon=%f.\n", svm_weight_epsilon);
00952 SG_INFO("Set to C_mkl=%f.\n", C_mkl);
00953 SG_INFO("Set to mkl_norm=%f.\n", mkl_norm);
00954
00955 return true;
00956 }
00957
00958 bool CGUIClassifier::set_elasticnet_lambda(float64_t lambda)
00959 {
00960 if (lambda<0 || lambda>1)
00961 SG_ERROR("0 <= ent_lambda <= 1\n");
00962
00963 ent_lambda = lambda;
00964 return true;
00965 }
00966
00967 bool CGUIClassifier::set_mkl_block_norm(float64_t mkl_bnorm)
00968 {
00969 if (mkl_bnorm<1)
00970 SG_ERROR("1 <= mkl_block_norm <= inf\n");
00971
00972 mkl_block_norm=mkl_bnorm;
00973 return true;
00974 }
00975
00976
00977 bool CGUIClassifier::set_svm_C(float64_t C1, float64_t C2)
00978 {
00979 if (C1<0)
00980 svm_C1=1.0;
00981 else
00982 svm_C1=C1;
00983
00984 if (C2<0)
00985 svm_C2=svm_C1;
00986 else
00987 svm_C2=C2;
00988
00989 SG_INFO("Set to C1=%f C2=%f.\n", svm_C1, svm_C2);
00990
00991 return true;
00992 }
00993
00994 bool CGUIClassifier::set_svm_qpsize(int32_t qpsize)
00995 {
00996 if (qpsize<2)
00997 svm_qpsize=41;
00998 else
00999 svm_qpsize=qpsize;
01000 SG_INFO("Set qpsize to svm_qpsize=%d.\n", svm_qpsize);
01001
01002 return true;
01003 }
01004
01005 bool CGUIClassifier::set_svm_max_qpsize(int32_t max_qpsize)
01006 {
01007 if (max_qpsize<50)
01008 svm_max_qpsize=50;
01009 else
01010 svm_max_qpsize=max_qpsize;
01011 SG_INFO("Set max qpsize to svm_max_qpsize=%d.\n", svm_max_qpsize);
01012
01013 return true;
01014 }
01015
01016 bool CGUIClassifier::set_svm_bufsize(int32_t bufsize)
01017 {
01018 if (svm_bufsize<0)
01019 svm_bufsize=3000;
01020 else
01021 svm_bufsize=bufsize;
01022 SG_INFO("Set bufsize to svm_bufsize=%d.\n", svm_bufsize);
01023
01024 return true ;
01025 }
01026
01027 bool CGUIClassifier::set_svm_shrinking_enabled(bool enabled)
01028 {
01029 svm_use_shrinking=enabled;
01030 if (svm_use_shrinking)
01031 SG_INFO("Enabling shrinking optimization.\n");
01032 else
01033 SG_INFO("Disabling shrinking optimization.\n");
01034
01035 return true;
01036 }
01037
01038 bool CGUIClassifier::set_svm_batch_computation_enabled(bool enabled)
01039 {
01040 svm_use_batch_computation=enabled;
01041 if (svm_use_batch_computation)
01042 SG_INFO("Enabling batch computation.\n");
01043 else
01044 SG_INFO("Disabling batch computation.\n");
01045
01046 return true;
01047 }
01048
01049 bool CGUIClassifier::set_svm_linadd_enabled(bool enabled)
01050 {
01051 svm_use_linadd=enabled;
01052 if (svm_use_linadd)
01053 SG_INFO("Enabling LINADD optimization.\n");
01054 else
01055 SG_INFO("Disabling LINADD optimization.\n");
01056
01057 return true;
01058 }
01059
01060 bool CGUIClassifier::set_svm_bias_enabled(bool enabled)
01061 {
01062 svm_use_bias=enabled;
01063 if (svm_use_bias)
01064 SG_INFO("Enabling svm bias.\n");
01065 else
01066 SG_INFO("Disabling svm bias.\n");
01067
01068 return true;
01069 }
01070
01071 bool CGUIClassifier::set_mkl_interleaved_enabled(bool enabled)
01072 {
01073 mkl_use_interleaved=enabled;
01074 if (mkl_use_interleaved)
01075 SG_INFO("Enabling mkl interleaved optimization.\n");
01076 else
01077 SG_INFO("Disabling mkl interleaved optimization.\n");
01078
01079 return true;
01080 }
01081
01082 bool CGUIClassifier::set_do_auc_maximization(bool do_auc)
01083 {
01084 svm_do_auc_maximization=do_auc;
01085
01086 if (svm_do_auc_maximization)
01087 SG_INFO("Enabling AUC maximization.\n");
01088 else
01089 SG_INFO("Disabling AUC maximization.\n");
01090
01091 return true;
01092 }
01093
01094
01095 CLabels* CGUIClassifier::classify()
01096 {
01097 ASSERT(classifier);
01098
01099 switch (classifier->get_classifier_type())
01100 {
01101 case CT_LIGHT:
01102 case CT_LIGHTONECLASS:
01103 case CT_LIBSVM:
01104 case CT_SCATTERSVM:
01105 case CT_MPD:
01106 case CT_GPBT:
01107 case CT_CPLEXSVM:
01108 case CT_GMNPSVM:
01109 case CT_GNPPSVM:
01110 case CT_LIBSVR:
01111 case CT_LIBSVMMULTICLASS:
01112 case CT_LIBSVMONECLASS:
01113 case CT_SVRLIGHT:
01114 case CT_MKLCLASSIFICATION:
01115 case CT_MKLMULTICLASS:
01116 case CT_MKLREGRESSION:
01117 case CT_MKLONECLASS:
01118 case CT_KRR:
01119 return classify_kernelmachine();
01120 case CT_KNN:
01121 return classify_distancemachine();
01122 case CT_PERCEPTRON:
01123 case CT_LDA:
01124 return classify_linear();
01125 case CT_SVMLIN:
01126 case CT_SVMPERF:
01127 case CT_SUBGRADIENTSVM:
01128 case CT_SVMOCAS:
01129 case CT_SVMSGD:
01130 case CT_LPM:
01131 case CT_LPBOOST:
01132 case CT_SUBGRADIENTLPM:
01133 case CT_LIBLINEAR:
01134 return classify_linear();
01135 case CT_WDSVMOCAS:
01136 return classify_byte_linear();
01137 default:
01138 SG_ERROR("unknown classifier type\n");
01139 break;
01140 };
01141
01142 return false;
01143 }
01144
01145 CLabels* CGUIClassifier::classify_kernelmachine()
01146 {
01147 CFeatures* trainfeatures=ui->ui_features->get_train_features();
01148 CFeatures* testfeatures=ui->ui_features->get_test_features();
01149
01150 if (!classifier)
01151 SG_ERROR("No kernelmachine available.\n");
01152
01153 bool success=true;
01154
01155 if (ui->ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM)
01156 {
01157 if (ui->ui_kernel->get_kernel()->get_kernel_type()==K_COMBINED
01158 && ( !trainfeatures || !testfeatures ))
01159 {
01160 SG_DEBUG("skipping initialisation of combined kernel "
01161 "as train/test features are unavailable\n");
01162 }
01163 else
01164 {
01165 if (!trainfeatures)
01166 SG_ERROR("No training features available.\n");
01167 if (!testfeatures)
01168 SG_ERROR("No test features available.\n");
01169
01170 success=ui->ui_kernel->init_kernel("TEST");
01171 }
01172 }
01173
01174 if (!success || !ui->ui_kernel->is_initialized())
01175 SG_ERROR("Kernel not initialized.\n");
01176
01177 CKernelMachine* km=(CKernelMachine*) classifier;
01178 km->set_kernel(ui->ui_kernel->get_kernel());
01179 km->set_batch_computation_enabled(svm_use_batch_computation);
01180
01181 SG_INFO("Starting kernel machine testing.\n");
01182 return classifier->apply();
01183 }
01184
01185 bool CGUIClassifier::get_trained_classifier(
01186 float64_t* &weights, int32_t &rows, int32_t &cols, float64_t*& bias,
01187 int32_t& brows, int32_t& bcols,
01188 int32_t idx)
01189 {
01190 ASSERT(classifier);
01191
01192 switch (classifier->get_classifier_type())
01193 {
01194 case CT_SCATTERSVM:
01195 case CT_GNPPSVM:
01196 case CT_LIBSVMMULTICLASS:
01197 case CT_LIGHT:
01198 case CT_LIGHTONECLASS:
01199 case CT_LIBSVM:
01200 case CT_MPD:
01201 case CT_GPBT:
01202 case CT_CPLEXSVM:
01203 case CT_GMNPSVM:
01204 case CT_LIBSVR:
01205 case CT_LIBSVMONECLASS:
01206 case CT_SVRLIGHT:
01207 case CT_MKLCLASSIFICATION:
01208 case CT_MKLREGRESSION:
01209 case CT_MKLONECLASS:
01210 case CT_MKLMULTICLASS:
01211 case CT_KRR:
01212 return get_svm(weights, rows, cols, bias, brows, bcols, idx);
01213 break;
01214 case CT_PERCEPTRON:
01215 case CT_LDA:
01216 case CT_LPM:
01217 case CT_LPBOOST:
01218 case CT_SUBGRADIENTLPM:
01219 case CT_SVMOCAS:
01220 case CT_SVMSGD:
01221 case CT_SVMLIN:
01222 case CT_SVMPERF:
01223 case CT_SUBGRADIENTSVM:
01224 case CT_LIBLINEAR:
01225 return get_linear(weights, rows, cols, bias, brows, bcols);
01226 break;
01227 case CT_KMEANS:
01228 case CT_HIERARCHICAL:
01229 return get_clustering(weights, rows, cols, bias, brows, bcols);
01230 break;
01231 case CT_KNN:
01232 SG_ERROR("not implemented");
01233 break;
01234 default:
01235 SG_ERROR("unknown classifier type\n");
01236 break;
01237 };
01238 return false;
01239 }
01240
01241
01242 int32_t CGUIClassifier::get_num_svms()
01243 {
01244 ASSERT(classifier);
01245 return ((CMultiClassSVM*) classifier)->get_num_svms();
01246 }
01247
01248 bool CGUIClassifier::get_svm(
01249 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias,
01250 int32_t& brows, int32_t& bcols, int32_t idx)
01251 {
01252 CSVM* svm=(CSVM*) classifier;
01253
01254 if (idx>-1)
01255 svm=((CMultiClassSVM*) svm)->get_svm(idx);
01256
01257 if (svm)
01258 {
01259 brows=1;
01260 bcols=1;
01261 bias=SG_MALLOC(float64_t, 1);
01262 *bias=svm->get_bias();
01263
01264 rows=svm->get_num_support_vectors();
01265 cols=2;
01266 weights=SG_MALLOC(float64_t, rows*cols);
01267
01268 for (int32_t i=0; i<rows; i++)
01269 {
01270 weights[i]=svm->get_alpha(i);
01271 weights[i+rows]=svm->get_support_vector(i);
01272 }
01273
01274 return true;
01275 }
01276
01277 return false;
01278 }
01279
01280 bool CGUIClassifier::get_clustering(
01281 float64_t* ¢ers, int32_t& rows, int32_t& cols, float64_t*& radi,
01282 int32_t& brows, int32_t& bcols)
01283 {
01284 if (!classifier)
01285 return false;
01286
01287 switch (classifier->get_classifier_type())
01288 {
01289 case CT_KMEANS:
01290 {
01291 CKMeans* clustering=(CKMeans*) classifier;
01292
01293 bcols=1;
01294 SGVector<float64_t> r=clustering->get_radiuses();
01295 brows=r.vlen;
01296 radi=SG_MALLOC(float64_t, brows);
01297 memcpy(radi, r.vector, sizeof(float64_t)*brows);
01298
01299 cols=1;
01300 SGMatrix<float64_t> c=clustering->get_cluster_centers();
01301 rows=c.num_rows;
01302 cols=c.num_cols;
01303 centers=SG_MALLOC(float64_t, rows*cols);
01304 memcpy(centers, c.matrix, sizeof(float64_t)*rows*cols);
01305 break;
01306 }
01307
01308 case CT_HIERARCHICAL:
01309 {
01310 CHierarchical* clustering=(CHierarchical*) classifier;
01311
01312
01313 bcols=1;
01314 SGVector<float64_t> r=clustering->get_merge_distances();
01315 brows=r.vlen;
01316 radi=SG_MALLOC(float64_t, brows);
01317 memcpy(radi, r.vector, sizeof(float64_t)*brows);
01318
01319 SGMatrix<int32_t> p=clustering->get_cluster_pairs();
01320 rows=p.num_rows;
01321 cols=p.num_cols;
01322 centers=SG_MALLOC(float64_t, rows*cols);
01323 for (int32_t i=0; i<rows*cols; i++)
01324 centers[i]=(float64_t) p.matrix[i];
01325
01326 break;
01327 }
01328
01329 default:
01330 SG_ERROR("internal error - unknown clustering type\n");
01331 }
01332
01333 return true;
01334 }
01335
01336 bool CGUIClassifier::get_linear(
01337 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias,
01338 int32_t& brows, int32_t& bcols)
01339 {
01340 CLinearMachine* linear=(CLinearMachine*) classifier;
01341
01342 if (!linear)
01343 return false;
01344
01345 bias=SG_MALLOC(float64_t, 1);
01346 *bias=linear->get_bias();
01347 brows=1;
01348 bcols=1;
01349
01350 cols=1;
01351 float64_t* w=NULL;
01352 linear->get_w(w, rows);
01353
01354 weights= SG_MALLOC(float64_t, rows);
01355 memcpy(weights, w, sizeof(float64_t)*rows);
01356
01357 return true;
01358 }
01359
01360 CLabels* CGUIClassifier::classify_distancemachine()
01361 {
01362 CFeatures* trainfeatures=ui->ui_features->get_train_features();
01363 CFeatures* testfeatures=ui->ui_features->get_test_features();
01364
01365 if (!classifier)
01366 {
01367 SG_ERROR("no kernelmachine available\n") ;
01368 return NULL;
01369 }
01370 if (!trainfeatures)
01371 {
01372 SG_ERROR("no training features available\n") ;
01373 return NULL;
01374 }
01375
01376 if (!testfeatures)
01377 {
01378 SG_ERROR("no test features available\n") ;
01379 return NULL;
01380 }
01381
01382 bool success=ui->ui_distance->init_distance("TEST");
01383
01384 if (!success || !ui->ui_distance->is_initialized())
01385 {
01386 SG_ERROR("distance not initialized\n") ;
01387 return NULL;
01388 }
01389
01390 ((CDistanceMachine*) classifier)->set_distance(
01391 ui->ui_distance->get_distance());
01392 SG_INFO("starting distance machine testing\n") ;
01393 return classifier->apply();
01394 }
01395
01396
01397 CLabels* CGUIClassifier::classify_linear()
01398 {
01399 CFeatures* testfeatures=ui->ui_features->get_test_features();
01400
01401 if (!classifier)
01402 {
01403 SG_ERROR("no classifier available\n") ;
01404 return NULL;
01405 }
01406 if (!testfeatures)
01407 {
01408 SG_ERROR("no test features available\n") ;
01409 return NULL;
01410 }
01411 if (!(testfeatures->has_property(FP_DOT)))
01412 {
01413 SG_ERROR("testfeatures not based on DotFeatures\n") ;
01414 return false ;
01415 }
01416
01417 ((CLinearMachine*) classifier)->set_features((CDotFeatures*) testfeatures);
01418 SG_INFO("starting linear classifier testing\n") ;
01419 return classifier->apply();
01420 }
01421
01422 CLabels* CGUIClassifier::classify_byte_linear()
01423 {
01424 CFeatures* testfeatures=ui->ui_features->get_test_features();
01425
01426 if (!classifier)
01427 {
01428 SG_ERROR("no svm available\n") ;
01429 return NULL;
01430 }
01431 if (!testfeatures)
01432 {
01433 SG_ERROR("no test features available\n") ;
01434 return NULL;
01435 }
01436 if (testfeatures->get_feature_class() != C_STRING ||
01437 testfeatures->get_feature_type() != F_BYTE )
01438 {
01439 SG_ERROR("testfeatures not of class STRING type BYTE\n") ;
01440 return false ;
01441 }
01442
01443 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) testfeatures);
01444 SG_INFO("starting linear classifier testing\n") ;
01445 return classifier->apply();
01446 }
01447
01448 bool CGUIClassifier::classify_example(int32_t idx, float64_t &result)
01449 {
01450 CFeatures* trainfeatures=ui->ui_features->get_train_features();
01451 CFeatures* testfeatures=ui->ui_features->get_test_features();
01452
01453 if (!classifier)
01454 {
01455 SG_ERROR("no svm available\n") ;
01456 return false;
01457 }
01458
01459 if (!ui->ui_kernel->is_initialized())
01460 {
01461 SG_ERROR("kernel not initialized\n") ;
01462 return false;
01463 }
01464
01465 if (!ui->ui_kernel->get_kernel() ||
01466 !ui->ui_kernel->get_kernel()->get_kernel_type()==K_CUSTOM)
01467 {
01468 if (!trainfeatures)
01469 {
01470 SG_ERROR("no training features available\n") ;
01471 return false;
01472 }
01473
01474 if (!testfeatures)
01475 {
01476 SG_ERROR("no test features available\n") ;
01477 return false;
01478 }
01479 }
01480
01481 ((CKernelMachine*) classifier)->set_kernel(
01482 ui->ui_kernel->get_kernel());
01483
01484 result=classifier->apply(idx);
01485 return true ;
01486 }
01487
01488
01489 bool CGUIClassifier::set_krr_tau(float64_t tau)
01490 {
01491 #ifdef HAVE_LAPACK
01492 krr_tau=tau;
01493 ((CKRR*) classifier)->set_tau(krr_tau);
01494 SG_INFO("Set to krr_tau=%f.\n", krr_tau);
01495
01496 return true;
01497 #else
01498 return false;
01499 #endif
01500 }
01501
01502 bool CGUIClassifier::set_solver(char* solver)
01503 {
01504 ESolverType s=ST_AUTO;
01505
01506 if (strncmp(solver,"NEWTON", 6)==0)
01507 {
01508 SG_INFO("Using NEWTON solver.\n");
01509 s=ST_NEWTON;
01510 }
01511 else if (strncmp(solver,"DIRECT", 6)==0)
01512 {
01513 SG_INFO("Using DIRECT solver\n");
01514 s=ST_DIRECT;
01515 }
01516 else if (strncmp(solver,"BLOCK_NORM", 9)==0)
01517 {
01518 SG_INFO("Using BLOCK_NORM solver\n");
01519 s=ST_BLOCK_NORM;
01520 }
01521 else if (strncmp(solver,"ELASTICNET", 10)==0)
01522 {
01523 SG_INFO("Using ELASTICNET solver\n");
01524 s=ST_ELASTICNET;
01525 }
01526 else if (strncmp(solver,"AUTO", 4)==0)
01527 {
01528 SG_INFO("Automagically determining solver.\n");
01529 s=ST_AUTO;
01530 }
01531 #ifdef USE_CPLEX
01532 else if (strncmp(solver, "CPLEX", 5)==0)
01533 {
01534 SG_INFO("USING CPLEX METHOD selected\n");
01535 s=ST_CPLEX;
01536 }
01537 #endif
01538 #ifdef USE_GLPK
01539 else if (strncmp(solver,"GLPK", 4)==0)
01540 {
01541 SG_INFO("Using GLPK solver\n");
01542 s=ST_GLPK;
01543 }
01544 #endif
01545 else
01546 SG_ERROR("Unknown solver type, %s (not compiled in?)\n", solver);
01547
01548
01549 solver_type=s;
01550 return true;
01551 }
01552
01553 bool CGUIClassifier::set_constraint_generator(char* name)
01554 {
01555 if (strcmp(name,"LIBSVM_ONECLASS")==0)
01556 {
01557 SG_UNREF(constraint_generator);
01558 constraint_generator = new CLibSVMOneClass();
01559 SG_INFO("created SVMlibsvm object for oneclass\n");
01560 }
01561 else if (strcmp(name,"LIBSVM_MULTICLASS")==0)
01562 {
01563 SG_UNREF(constraint_generator);
01564 constraint_generator = new CLibSVMMultiClass();
01565 SG_INFO("created SVMlibsvm object for multiclass\n");
01566 }
01567 else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0)
01568 {
01569 SG_UNREF(constraint_generator);
01570 constraint_generator= new CLibSVMMultiClass(LIBSVM_NU_SVC);
01571 SG_INFO("created SVMlibsvm object for multiclass\n") ;
01572 }
01573 else if (strcmp(name,"SCATTERSVM_RULE1")==0)
01574 {
01575 SG_UNREF(constraint_generator);
01576 constraint_generator= new CScatterSVM(TEST_RULE1);
01577 SG_INFO("created ScatterSVM RULE1 object\n") ;
01578 }
01579 else if (strcmp(name,"SCATTERSVM_RULE2")==0)
01580 {
01581 SG_UNREF(constraint_generator);
01582 constraint_generator= new CScatterSVM(TEST_RULE2);
01583 SG_INFO("created ScatterSVM RULE2 object\n") ;
01584 }
01585 else if (strcmp(name,"LIBSVM_NU")==0)
01586 {
01587 SG_UNREF(constraint_generator);
01588 constraint_generator= new CLibSVM(LIBSVM_NU_SVC);
01589 SG_INFO("created SVMlibsvm object\n") ;
01590 }
01591 else if (strcmp(name,"LIBSVM")==0)
01592 {
01593 SG_UNREF(constraint_generator);
01594 constraint_generator= new CLibSVM();
01595 SG_INFO("created SVMlibsvm object\n") ;
01596 }
01597 else if (strcmp(name,"LARANK")==0)
01598 {
01599 SG_UNREF(constraint_generator);
01600 constraint_generator= new CLaRank();
01601 SG_INFO("created LaRank object\n") ;
01602 }
01603 #ifdef USE_SVMLIGHT
01604 else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0))
01605 {
01606 SG_UNREF(constraint_generator);
01607 constraint_generator= new CSVMLight();
01608 SG_INFO("created SVMLight object\n") ;
01609 }
01610 else if (strcmp(name,"SVMLIGHT_ONECLASS")==0)
01611 {
01612 SG_UNREF(constraint_generator);
01613 constraint_generator= new CSVMLightOneClass();
01614 SG_INFO("created SVMLightOneClass object\n") ;
01615 }
01616 else if (strcmp(name,"SVRLIGHT")==0)
01617 {
01618 SG_UNREF(constraint_generator);
01619 constraint_generator= new CSVRLight();
01620 SG_INFO("created SVRLight object\n") ;
01621 }
01622 #endif //USE_SVMLIGHT
01623 else if (strcmp(name,"GPBTSVM")==0)
01624 {
01625 SG_UNREF(constraint_generator);
01626 constraint_generator= new CGPBTSVM();
01627 SG_INFO("created GPBT-SVM object\n") ;
01628 }
01629 else if (strcmp(name,"MPDSVM")==0)
01630 {
01631 SG_UNREF(constraint_generator);
01632 constraint_generator= new CMPDSVM();
01633 SG_INFO("created MPD-SVM object\n") ;
01634 }
01635 else if (strcmp(name,"GNPPSVM")==0)
01636 {
01637 SG_UNREF(constraint_generator);
01638 constraint_generator= new CGNPPSVM();
01639 SG_INFO("created GNPP-SVM object\n") ;
01640 }
01641 else if (strcmp(name,"GMNPSVM")==0)
01642 {
01643 SG_UNREF(constraint_generator);
01644 constraint_generator= new CGMNPSVM();
01645 SG_INFO("created GMNP-SVM object\n") ;
01646 }
01647 else if (strcmp(name,"LIBSVR")==0)
01648 {
01649 SG_UNREF(constraint_generator);
01650 constraint_generator= new CLibSVR();
01651 SG_INFO("created SVRlibsvm object\n") ;
01652 }
01653 else
01654 {
01655 SG_ERROR("Unknown SV-classifier %s.\n", name);
01656 return false;
01657 }
01658 SG_REF(constraint_generator);
01659
01660 return (constraint_generator!=NULL);
01661 }