00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <shogun/ui/GUIClassifier.h>
00012 #include <shogun/ui/SGInterface.h>
00013
00014 #include <shogun/lib/config.h>
00015 #include <shogun/io/SGIO.h>
00016
00017 #include <shogun/features/SparseFeatures.h>
00018 #include <shogun/features/RealFileFeatures.h>
00019 #include <shogun/labels/Labels.h>
00020
00021 #include <shogun/kernel/AUCKernel.h>
00022
00023 #include <shogun/multiclass/KNN.h>
00024 #include <shogun/clustering/KMeans.h>
00025 #include <shogun/clustering/Hierarchical.h>
00026 #include <shogun/classifier/PluginEstimate.h>
00027
00028 #include <shogun/classifier/LDA.h>
00029 #include <shogun/classifier/LPM.h>
00030 #include <shogun/classifier/LPBoost.h>
00031 #include <shogun/classifier/Perceptron.h>
00032
00033 #include <shogun/machine/LinearMachine.h>
00034
00035 #ifdef USE_SVMLIGHT
00036 #include <shogun/classifier/svm/SVMLight.h>
00037 #include <shogun/classifier/svm/SVMLightOneClass.h>
00038 #include <shogun/regression/svr/SVRLight.h>
00039 #endif //USE_SVMLIGHT
00040
00041 #include <shogun/classifier/mkl/MKLClassification.h>
00042 #include <shogun/regression/svr/MKLRegression.h>
00043 #include <shogun/classifier/mkl/MKLOneClass.h>
00044 #include <shogun/classifier/mkl/MKLMulticlass.h>
00045 #include <shogun/classifier/svm/LibSVM.h>
00046 #include <shogun/multiclass/LaRank.h>
00047 #include <shogun/classifier/svm/GPBTSVM.h>
00048 #include <shogun/classifier/svm/LibSVMOneClass.h>
00049 #include <shogun/multiclass/MulticlassLibSVM.h>
00050
00051 #include <shogun/regression/svr/LibSVR.h>
00052 #include <shogun/regression/KernelRidgeRegression.h>
00053
00054 #include <shogun/classifier/svm/LibLinear.h>
00055 #include <shogun/classifier/svm/MPDSVM.h>
00056 #include <shogun/classifier/svm/GNPPSVM.h>
00057 #include <shogun/multiclass/GMNPSVM.h>
00058 #include <shogun/multiclass/ScatterSVM.h>
00059
00060 #include <shogun/classifier/svm/SVMLin.h>
00061 #include <shogun/classifier/svm/SubGradientSVM.h>
00062 #include <shogun/classifier/SubGradientLPM.h>
00063 #include <shogun/classifier/svm/SVMOcas.h>
00064 #include <shogun/classifier/svm/SVMSGD.h>
00065 #include <shogun/classifier/svm/WDSVMOcas.h>
00066
00067 #include <shogun/io/SerializableAsciiFile.h>
00068
00069 using namespace shogun;
00070
00071 CGUIClassifier::CGUIClassifier(CSGInterface* ui_)
00072 : CSGObject(), ui(ui_)
00073 {
00074 constraint_generator=NULL;
00075 classifier=NULL;
00076 max_train_time=0;
00077
00078
00079 perceptron_learnrate=0.1;
00080 perceptron_maxiter=1000;
00081
00082
00083 svm_qpsize=41;
00084 svm_bufsize=3000;
00085 svm_max_qpsize=1000;
00086 mkl_norm=1;
00087 ent_lambda=0;
00088 mkl_block_norm=4;
00089 svm_C1=1;
00090 svm_C2=1;
00091 C_mkl=0;
00092 mkl_use_interleaved=true;
00093 svm_weight_epsilon=1e-5;
00094 svm_epsilon=1e-5;
00095 svm_tube_epsilon=1e-2;
00096 svm_nu=0.5;
00097 svm_use_shrinking = true ;
00098
00099 svm_use_bias = true;
00100 svm_use_batch_computation = true ;
00101 svm_use_linadd = true ;
00102 svm_do_auc_maximization = false ;
00103
00104
00105 krr_tau=1;
00106
00107 solver_type=ST_AUTO;
00108 }
00109
00110 CGUIClassifier::~CGUIClassifier()
00111 {
00112 SG_UNREF(classifier);
00113 SG_UNREF(constraint_generator);
00114 }
00115
00116 bool CGUIClassifier::new_classifier(char* name, int32_t d, int32_t from_d)
00117 {
00118 if (strcmp(name,"LIBSVM_ONECLASS")==0)
00119 {
00120 SG_UNREF(classifier);
00121 classifier = new CLibSVMOneClass();
00122 SG_INFO("created SVMlibsvm object for oneclass\n");
00123 }
00124 else if (strcmp(name,"LIBSVM_MULTICLASS")==0)
00125 {
00126 SG_UNREF(classifier);
00127 classifier = new CMulticlassLibSVM();
00128 SG_INFO("created SVMlibsvm object for multiclass\n");
00129 }
00130 else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0)
00131 {
00132 SG_UNREF(classifier);
00133 classifier= new CMulticlassLibSVM(LIBSVM_NU_SVC);
00134 SG_INFO("created SVMlibsvm object for multiclass\n") ;
00135 }
00136 #ifdef USE_SVMLIGHT
00137 else if (strcmp(name,"SCATTERSVM_NO_BIAS_SVMLIGHT")==0)
00138 {
00139 SG_UNREF(classifier);
00140 classifier= new CScatterSVM(NO_BIAS_SVMLIGHT);
00141 SG_INFO("created ScatterSVM NO BIAS SVMLIGHT object\n") ;
00142 }
00143 #endif //USE_SVMLIGHT
00144 else if (strcmp(name,"SCATTERSVM_NO_BIAS_LIBSVM")==0)
00145 {
00146 SG_UNREF(classifier);
00147 classifier= new CScatterSVM(NO_BIAS_LIBSVM);
00148 SG_INFO("created ScatterSVM NO BIAS LIBSVM object\n") ;
00149 }
00150 else if (strcmp(name,"SCATTERSVM_TESTRULE1")==0)
00151 {
00152 SG_UNREF(classifier);
00153 classifier= new CScatterSVM(TEST_RULE1);
00154 SG_INFO("created ScatterSVM TESTRULE1 object\n") ;
00155 }
00156 else if (strcmp(name,"SCATTERSVM_TESTRULE2")==0)
00157 {
00158 SG_UNREF(classifier);
00159 classifier= new CScatterSVM(TEST_RULE2);
00160 SG_INFO("created ScatterSVM TESTRULE2 object\n") ;
00161 }
00162 else if (strcmp(name,"LIBSVM_NU")==0)
00163 {
00164 SG_UNREF(classifier);
00165 classifier= new CLibSVM(LIBSVM_NU_SVC);
00166 SG_INFO("created SVMlibsvm object\n") ;
00167 }
00168 else if (strcmp(name,"LIBSVM")==0)
00169 {
00170 SG_UNREF(classifier);
00171 classifier= new CLibSVM();
00172 SG_INFO("created SVMlibsvm object\n") ;
00173 }
00174 else if (strcmp(name,"LARANK")==0)
00175 {
00176 SG_UNREF(classifier);
00177 classifier= new CLaRank();
00178 SG_INFO("created LaRank object\n") ;
00179 }
00180 #ifdef USE_SVMLIGHT
00181 else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0))
00182 {
00183 SG_UNREF(classifier);
00184 classifier= new CSVMLight();
00185 SG_INFO("created SVMLight object\n") ;
00186 }
00187 else if (strcmp(name,"SVMLIGHT_ONECLASS")==0)
00188 {
00189 SG_UNREF(classifier);
00190 classifier= new CSVMLightOneClass();
00191 SG_INFO("created SVMLightOneClass object\n") ;
00192 }
00193 else if (strcmp(name,"SVRLIGHT")==0)
00194 {
00195 SG_UNREF(classifier);
00196 classifier= new CSVRLight();
00197 SG_INFO("created SVRLight object\n") ;
00198 }
00199 #endif //USE_SVMLIGHT
00200 else if (strcmp(name,"GPBTSVM")==0)
00201 {
00202 SG_UNREF(classifier);
00203 classifier= new CGPBTSVM();
00204 SG_INFO("created GPBT-SVM object\n") ;
00205 }
00206 else if (strcmp(name,"MPDSVM")==0)
00207 {
00208 SG_UNREF(classifier);
00209 classifier= new CMPDSVM();
00210 SG_INFO("created MPD-SVM object\n") ;
00211 }
00212 else if (strcmp(name,"GNPPSVM")==0)
00213 {
00214 SG_UNREF(classifier);
00215 classifier= new CGNPPSVM();
00216 SG_INFO("created GNPP-SVM object\n") ;
00217 }
00218 else if (strcmp(name,"GMNPSVM")==0)
00219 {
00220 SG_UNREF(classifier);
00221 classifier= new CGMNPSVM();
00222 SG_INFO("created GMNP-SVM object\n") ;
00223 }
00224 else if (strcmp(name,"LIBSVR")==0)
00225 {
00226 SG_UNREF(classifier);
00227 classifier= new CLibSVR();
00228 SG_INFO("created SVRlibsvm object\n") ;
00229 }
00230 #ifdef HAVE_LAPACK
00231 else if (strcmp(name, "KERNELRIDGEREGRESSION")==0)
00232 {
00233 SG_UNREF(classifier);
00234 classifier=new CKernelRidgeRegression(krr_tau, ui->ui_kernel->get_kernel(),
00235 ui->ui_labels->get_train_labels());
00236 SG_INFO("created KernelRidgeRegression object %p\n", classifier);
00237 }
00238 #endif //HAVE_LAPACK
00239 else if (strcmp(name,"PERCEPTRON")==0)
00240 {
00241 SG_UNREF(classifier);
00242 classifier= new CPerceptron();
00243 SG_INFO("created Perceptron object\n") ;
00244 }
00245 #ifdef HAVE_LAPACK
00246 else if (strncmp(name,"LIBLINEAR",9)==0)
00247 {
00248 LIBLINEAR_SOLVER_TYPE st=L2R_LR;
00249
00250 if (strcmp(name,"LIBLINEAR_L2R_LR")==0)
00251 {
00252 st=L2R_LR;
00253 SG_INFO("created LibLinear l2 regularized logistic regression object\n") ;
00254 }
00255 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC_DUAL")==0)
00256 {
00257 st=L2R_L2LOSS_SVC_DUAL;
00258 SG_INFO("created LibLinear l2 regularized l2 loss SVM dual object\n") ;
00259 }
00260 else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC")==0)
00261 {
00262 st=L2R_L2LOSS_SVC;
00263 SG_INFO("created LibLinear l2 regularized l2 loss SVM primal object\n") ;
00264 }
00265 else if (strcmp(name,"LIBLINEAR_L1R_L2LOSS_SVC")==0)
00266 {
00267 st=L1R_L2LOSS_SVC;
00268 SG_INFO("created LibLinear l1 regularized l2 loss SVM primal object\n") ;
00269 }
00270 else if (strcmp(name,"LIBLINEAR_L2R_L1LOSS_SVC_DUAL")==0)
00271 {
00272 st=L2R_L1LOSS_SVC_DUAL;
00273 SG_INFO("created LibLinear l2 regularized l1 loss dual SVM object\n") ;
00274 }
00275 else
00276 SG_ERROR("unknown liblinear type\n");
00277
00278 SG_UNREF(classifier);
00279 classifier= new CLibLinear(st);
00280 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2);
00281 ((CLibLinear*) classifier)->set_epsilon(svm_epsilon);
00282 ((CLibLinear*) classifier)->set_bias_enabled(svm_use_bias);
00283 }
00284 else if (strcmp(name,"LDA")==0)
00285 {
00286 SG_UNREF(classifier);
00287 classifier= new CLDA();
00288 SG_INFO("created LDA object\n") ;
00289 }
00290 #endif //HAVE_LAPACK
00291 #ifdef USE_CPLEX
00292 else if (strcmp(name,"LPM")==0)
00293 {
00294 SG_UNREF(classifier);
00295 classifier= new CLPM();
00296 ((CLPM*) classifier)->set_C(svm_C1, svm_C2);
00297 ((CLPM*) classifier)->set_epsilon(svm_epsilon);
00298 ((CLPM*) classifier)->set_bias_enabled(svm_use_bias);
00299 ((CLPM*) classifier)->set_max_train_time(max_train_time);
00300 SG_INFO("created LPM object\n") ;
00301 }
00302 else if (strcmp(name,"LPBOOST")==0)
00303 {
00304 SG_UNREF(classifier);
00305 classifier= new CLPBoost();
00306 ((CLPBoost*) classifier)->set_C(svm_C1, svm_C2);
00307 ((CLPBoost*) classifier)->set_epsilon(svm_epsilon);
00308 ((CLPBoost*) classifier)->set_bias_enabled(svm_use_bias);
00309 ((CLPBoost*) classifier)->set_max_train_time(max_train_time);
00310 SG_INFO("created LPBoost object\n") ;
00311 }
00312 else if (strcmp(name,"SUBGRADIENTLPM")==0)
00313 {
00314 SG_UNREF(classifier);
00315 classifier= new CSubGradientLPM();
00316
00317 ((CSubGradientLPM*) classifier)->set_bias_enabled(svm_use_bias);
00318 ((CSubGradientLPM*) classifier)->set_qpsize(svm_qpsize);
00319 ((CSubGradientLPM*) classifier)->set_qpsize_max(svm_max_qpsize);
00320 ((CSubGradientLPM*) classifier)->set_C(svm_C1, svm_C2);
00321 ((CSubGradientLPM*) classifier)->set_epsilon(svm_epsilon);
00322 ((CSubGradientLPM*) classifier)->set_max_train_time(max_train_time);
00323 SG_INFO("created Subgradient LPM object\n") ;
00324 }
00325 #endif //USE_CPLEX
00326 else if (strncmp(name,"KNN", strlen("KNN"))==0)
00327 {
00328 SG_UNREF(classifier);
00329 classifier= new CKNN();
00330 SG_INFO("created KNN object\n") ;
00331 }
00332 else if (strncmp(name,"KMEANS", strlen("KMEANS"))==0)
00333 {
00334 SG_UNREF(classifier);
00335 classifier= new CKMeans();
00336 SG_INFO("created KMeans object\n") ;
00337 }
00338 else if (strncmp(name,"HIERARCHICAL", strlen("HIERARCHICAL"))==0)
00339 {
00340 SG_UNREF(classifier);
00341 classifier= new CHierarchical();
00342 SG_INFO("created Hierarchical clustering object\n") ;
00343 }
00344 else if (strcmp(name,"SVMLIN")==0)
00345 {
00346 SG_UNREF(classifier);
00347 classifier= new CSVMLin();
00348 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2);
00349 ((CSVMLin*) classifier)->set_epsilon(svm_epsilon);
00350 ((CSVMLin*) classifier)->set_bias_enabled(svm_use_bias);
00351 SG_INFO("created SVMLin object\n") ;
00352 }
00353 else if (strcmp(name,"SUBGRADIENTSVM")==0)
00354 {
00355 SG_UNREF(classifier);
00356 classifier= new CSubGradientSVM();
00357
00358 ((CSubGradientSVM*) classifier)->set_bias_enabled(svm_use_bias);
00359 ((CSubGradientSVM*) classifier)->set_qpsize(svm_qpsize);
00360 ((CSubGradientSVM*) classifier)->set_qpsize_max(svm_max_qpsize);
00361 ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2);
00362 ((CSubGradientSVM*) classifier)->set_epsilon(svm_epsilon);
00363 ((CSubGradientSVM*) classifier)->set_max_train_time(max_train_time);
00364 SG_INFO("created Subgradient SVM object\n") ;
00365 }
00366 else if (strncmp(name,"WDSVMOCAS", strlen("WDSVMOCAS"))==0)
00367 {
00368 SG_UNREF(classifier);
00369 classifier= new CWDSVMOcas(SVM_OCAS);
00370
00371 ((CWDSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00372 ((CWDSVMOcas*) classifier)->set_degree(d, from_d);
00373 ((CWDSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00374 ((CWDSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00375 ((CWDSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00376 SG_INFO("created Weighted Degree Kernel SVM Ocas(OCAS) object of order %d (from order:%d)\n", d, from_d) ;
00377 }
00378 else if (strcmp(name,"SVMOCAS")==0)
00379 {
00380 SG_UNREF(classifier);
00381 classifier= new CSVMOcas(SVM_OCAS);
00382
00383 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00384 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00385 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00386 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00387 SG_INFO("created SVM Ocas(OCAS) object\n") ;
00388 }
00389 else if (strcmp(name,"SVMSGD")==0)
00390 {
00391 SG_UNREF(classifier);
00392 classifier= new CSVMSGD(svm_C1);
00393 ((CSVMSGD*) classifier)->set_bias_enabled(svm_use_bias);
00394 SG_INFO("created SVM SGD object\n") ;
00395 }
00396 else if (strcmp(name,"SVMBMRM")==0 || (strcmp(name,"SVMPERF")==0))
00397 {
00398 SG_UNREF(classifier);
00399 classifier= new CSVMOcas(SVM_BMRM);
00400
00401 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00402 ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00403 ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00404 ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00405 SG_INFO("created SVM Ocas(BMRM/PERF) object\n") ;
00406 }
00407 else if (strcmp(name,"MKL_CLASSIFICATION")==0)
00408 {
00409 SG_UNREF(classifier);
00410 classifier= new CMKLClassification();
00411 }
00412 else if (strcmp(name,"MKL_ONECLASS")==0)
00413 {
00414 SG_UNREF(classifier);
00415 classifier= new CMKLOneClass();
00416 }
00417 else if (strcmp(name,"MKL_MULTICLASS")==0)
00418 {
00419 SG_UNREF(classifier);
00420 classifier= new CMKLMulticlass();
00421 }
00422 else if (strcmp(name,"MKL_REGRESSION")==0)
00423 {
00424 SG_UNREF(classifier);
00425 classifier= new CMKLRegression();
00426 }
00427 else
00428 {
00429 SG_ERROR("Unknown classifier %s.\n", name);
00430 return false;
00431 }
00432 SG_REF(classifier);
00433
00434 return (classifier!=NULL);
00435 }
00436
00437 bool CGUIClassifier::train_mkl_multiclass()
00438 {
00439 CMKLMulticlass* mkl= (CMKLMulticlass*) classifier;
00440 if (!mkl)
00441 SG_ERROR("No MKL available.\n");
00442
00443 CLabels* trainlabels=ui->ui_labels->get_train_labels();
00444 if (!trainlabels)
00445 SG_ERROR("No trainlabels available.\n");
00446
00447 CKernel* kernel=ui->ui_kernel->get_kernel();
00448 if (!kernel)
00449 SG_ERROR("No kernel available.\n");
00450
00451 bool success=ui->ui_kernel->init_kernel("TRAIN");
00452
00453 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00454 SG_ERROR("Kernel not initialized / no train features available.\n");
00455
00456 int32_t num_vec=kernel->get_num_vec_lhs();
00457 if (trainlabels->get_num_labels() != num_vec)
00458 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00459
00460 SG_INFO("Starting MC-MKL training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00461
00462 mkl->set_mkl_epsilon(svm_weight_epsilon);
00463 mkl->set_mkl_norm(mkl_norm);
00464
00465 mkl->set_solver_type(solver_type);
00466 mkl->set_bias_enabled(svm_use_bias);
00467 mkl->set_epsilon(svm_epsilon);
00468 mkl->set_max_train_time(max_train_time);
00469 mkl->set_tube_epsilon(svm_tube_epsilon);
00470 mkl->set_nu(svm_nu);
00471 mkl->set_C(svm_C1);
00472 mkl->set_qpsize(svm_qpsize);
00473 mkl->set_shrinking_enabled(svm_use_shrinking);
00474 mkl->set_linadd_enabled(svm_use_linadd);
00475 mkl->set_batch_computation_enabled(svm_use_batch_computation);
00476
00477 ((CKernelMulticlassMachine*) mkl)->set_labels(trainlabels);
00478 ((CKernelMulticlassMachine*) mkl)->set_kernel(kernel);
00479
00480 return mkl->train();
00481 }
00482
00483 bool CGUIClassifier::train_mkl()
00484 {
00485 CMKL* mkl= (CMKL*) classifier;
00486 if (!mkl)
00487 SG_ERROR("No SVM available.\n");
00488
00489 bool oneclass=(mkl->get_classifier_type()==CT_LIBSVMONECLASS);
00490 CLabels* trainlabels=NULL;
00491 if(!oneclass)
00492 trainlabels=ui->ui_labels->get_train_labels();
00493 else
00494 SG_INFO("Training one class mkl.\n");
00495 if (!trainlabels && !oneclass)
00496 SG_ERROR("No trainlabels available.\n");
00497
00498 CKernel* kernel=ui->ui_kernel->get_kernel();
00499 if (!kernel)
00500 SG_ERROR("No kernel available.\n");
00501
00502 bool success=ui->ui_kernel->init_kernel("TRAIN");
00503 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00504 SG_ERROR("Kernel not initialized.\n");
00505
00506 int32_t num_vec=kernel->get_num_vec_lhs();
00507 if (!oneclass && trainlabels->get_num_labels() != num_vec)
00508 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00509
00510 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00511
00512 if (constraint_generator)
00513 mkl->set_constraint_generator(constraint_generator);
00514 mkl->set_solver_type(solver_type);
00515 mkl->set_bias_enabled(svm_use_bias);
00516 mkl->set_epsilon(svm_epsilon);
00517 mkl->set_max_train_time(max_train_time);
00518 mkl->set_tube_epsilon(svm_tube_epsilon);
00519 mkl->set_nu(svm_nu);
00520 mkl->set_C(svm_C1, svm_C2);
00521 mkl->set_qpsize(svm_qpsize);
00522 mkl->set_shrinking_enabled(svm_use_shrinking);
00523 mkl->set_linadd_enabled(svm_use_linadd);
00524 mkl->set_batch_computation_enabled(svm_use_batch_computation);
00525 mkl->set_mkl_epsilon(svm_weight_epsilon);
00526 mkl->set_mkl_norm(mkl_norm);
00527 mkl->set_elasticnet_lambda(ent_lambda);
00528 mkl->set_mkl_block_norm(mkl_block_norm);
00529 mkl->set_C_mkl(C_mkl);
00530 mkl->set_interleaved_optimization_enabled(mkl_use_interleaved);
00531
00532 if (svm_do_auc_maximization)
00533 {
00534 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel);
00535 CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels);
00536 ((CKernelMachine*) mkl)->set_labels(auc_labels);
00537 ((CKernelMachine*) mkl)->set_kernel(auc_kernel);
00538 SG_UNREF(auc_labels);
00539 }
00540 else
00541 {
00542 if(!oneclass)
00543 ((CKernelMachine*) mkl)->set_labels(trainlabels);
00544 ((CKernelMachine*) mkl)->set_kernel(kernel);
00545 }
00546
00547 bool result=mkl->train();
00548
00549 return result;
00550 }
00551
00552 bool CGUIClassifier::train_svm()
00553 {
00554 EMachineType type = classifier->get_classifier_type();
00555
00556 if (!classifier)
00557 SG_ERROR("No SVM available.\n");
00558
00559 bool oneclass=(type==CT_LIBSVMONECLASS);
00560 CLabels* trainlabels=NULL;
00561 if(!oneclass)
00562 trainlabels=ui->ui_labels->get_train_labels();
00563 else
00564 SG_INFO("Training one class svm.\n");
00565 if (!trainlabels && !oneclass)
00566 SG_ERROR("No trainlabels available.\n");
00567
00568 CKernel* kernel=ui->ui_kernel->get_kernel();
00569 if (!kernel)
00570 SG_ERROR("No kernel available.\n");
00571
00572 bool success=ui->ui_kernel->init_kernel("TRAIN");
00573
00574 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00575 SG_ERROR("Kernel not initialized / no train features available.\n");
00576
00577 int32_t num_vec=kernel->get_num_vec_lhs();
00578 if (!oneclass && trainlabels->get_num_labels() != num_vec)
00579 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00580
00581 SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00582
00583 if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS)
00584 {
00585 CMulticlassSVM* svm = (CMulticlassSVM*)classifier;
00586 svm->set_solver_type(solver_type);
00587 svm->set_bias_enabled(svm_use_bias);
00588 svm->set_epsilon(svm_epsilon);
00589 svm->set_max_train_time(max_train_time);
00590 svm->set_tube_epsilon(svm_tube_epsilon);
00591 svm->set_nu(svm_nu);
00592 svm->set_C(svm_C1);
00593 svm->set_qpsize(svm_qpsize);
00594 svm->set_shrinking_enabled(svm_use_shrinking);
00595 svm->set_linadd_enabled(svm_use_linadd);
00596 svm->set_batch_computation_enabled(svm_use_batch_computation);
00597 }
00598 else
00599 {
00600 CSVM* svm = (CSVM*)classifier;
00601 svm->set_solver_type(solver_type);
00602 svm->set_bias_enabled(svm_use_bias);
00603 svm->set_epsilon(svm_epsilon);
00604 svm->set_max_train_time(max_train_time);
00605 svm->set_tube_epsilon(svm_tube_epsilon);
00606 svm->set_nu(svm_nu);
00607 svm->set_C(svm_C1, svm_C2);
00608 svm->set_qpsize(svm_qpsize);
00609 svm->set_shrinking_enabled(svm_use_shrinking);
00610 svm->set_linadd_enabled(svm_use_linadd);
00611 svm->set_batch_computation_enabled(svm_use_batch_computation);
00612 }
00613
00614 if (type==CT_MKLMULTICLASS)
00615 {
00616 ((CMKLMulticlass *)classifier)->set_mkl_epsilon(svm_weight_epsilon);
00617 }
00618
00619 if (svm_do_auc_maximization)
00620 {
00621 CAUCKernel* auc_kernel = new CAUCKernel(10, kernel);
00622 CLabels* auc_labels = auc_kernel->setup_auc_maximization(trainlabels);
00623 ((CKernelMachine*)classifier)->set_labels(auc_labels);
00624 ((CKernelMachine*)classifier)->set_kernel(auc_kernel);
00625 SG_UNREF(auc_labels);
00626 }
00627 else
00628 {
00629 if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS)
00630 {
00631 ((CKernelMulticlassMachine*)classifier)->set_labels(trainlabels);
00632 ((CKernelMulticlassMachine*)classifier)->set_kernel(kernel);
00633 }
00634 else
00635 {
00636 if(!oneclass)
00637 ((CKernelMachine*)classifier)->set_labels(trainlabels);
00638
00639 ((CKernelMachine*)classifier)->set_kernel(kernel);
00640 }
00641 }
00642
00643 bool result = classifier->train();
00644
00645 return result;
00646 }
00647
00648 bool CGUIClassifier::train_clustering(int32_t k, int32_t max_iter)
00649 {
00650 bool result=false;
00651 CDistance* distance=ui->ui_distance->get_distance();
00652
00653 if (!distance)
00654 SG_ERROR("No distance available\n");
00655
00656 if (!ui->ui_distance->init_distance("TRAIN"))
00657 SG_ERROR("Initializing distance with train features failed.\n");
00658
00659 ((CDistanceMachine*) classifier)->set_distance(distance);
00660
00661 EMachineType type=classifier->get_classifier_type();
00662 switch (type)
00663 {
00664 case CT_KMEANS:
00665 {
00666 ((CKMeans*) classifier)->set_k(k);
00667 ((CKMeans*) classifier)->set_max_iter(max_iter);
00668 result=((CKMeans*) classifier)->train();
00669 break;
00670 }
00671 case CT_HIERARCHICAL:
00672 {
00673 ((CHierarchical*) classifier)->set_merges(k);
00674 result=((CHierarchical*) classifier)->train();
00675 break;
00676 }
00677 default:
00678 SG_ERROR("Unknown clustering type %d\n", type);
00679 }
00680
00681 return result;
00682 }
00683
00684 bool CGUIClassifier::train_knn(int32_t k)
00685 {
00686 CLabels* trainlabels=ui->ui_labels->get_train_labels();
00687 CDistance* distance=ui->ui_distance->get_distance();
00688
00689 bool result=false;
00690
00691 if (trainlabels)
00692 {
00693 if (distance)
00694 {
00695 if (!ui->ui_distance->init_distance("TRAIN"))
00696 SG_ERROR("Initializing distance with train features failed.\n");
00697 ((CKNN*) classifier)->set_labels(trainlabels);
00698 ((CKNN*) classifier)->set_distance(distance);
00699 ((CKNN*) classifier)->set_k(k);
00700 result=((CKNN*) classifier)->train();
00701 }
00702 else
00703 SG_ERROR("No distance available.\n");
00704 }
00705 else
00706 SG_ERROR("No labels available\n");
00707
00708 return result;
00709 }
00710
00711 bool CGUIClassifier::train_krr()
00712 {
00713 #ifdef HAVE_LAPACK
00714 CKernelRidgeRegression* krr= (CKernelRidgeRegression*) classifier;
00715 if (!krr)
00716 SG_ERROR("No SVM available.\n");
00717
00718 CLabels* trainlabels=NULL;
00719 trainlabels=ui->ui_labels->get_train_labels();
00720 if (!trainlabels)
00721 SG_ERROR("No trainlabels available.\n");
00722
00723 CKernel* kernel=ui->ui_kernel->get_kernel();
00724 if (!kernel)
00725 SG_ERROR("No kernel available.\n");
00726
00727 bool success=ui->ui_kernel->init_kernel("TRAIN");
00728
00729 if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00730 SG_ERROR("Kernel not initialized / no train features available.\n");
00731
00732 int32_t num_vec=kernel->get_num_vec_lhs();
00733 if (trainlabels->get_num_labels() != num_vec)
00734 SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00735
00736
00737
00738 krr->set_labels(trainlabels);
00739 krr->set_kernel(kernel);
00740
00741 bool result=krr->train();
00742 return result;
00743 #else
00744 return false;
00745 #endif
00746 }
00747
00748 bool CGUIClassifier::train_linear(float64_t gamma)
00749 {
00750 ASSERT(classifier);
00751 EMachineType ctype = classifier->get_classifier_type();
00752 CFeatures* trainfeatures=ui->ui_features->get_train_features();
00753 CLabels* trainlabels=ui->ui_labels->get_train_labels();
00754 bool result=false;
00755
00756 if (!trainfeatures)
00757 SG_ERROR("No trainfeatures available.\n");
00758
00759 if (!trainfeatures->has_property(FP_DOT))
00760 SG_ERROR("Trainfeatures not based on DotFeatures.\n");
00761
00762 if (!trainlabels)
00763 SG_ERROR("No labels available\n");
00764
00765 if (ctype==CT_PERCEPTRON)
00766 {
00767 ((CPerceptron*) classifier)->set_learn_rate(perceptron_learnrate);
00768 ((CPerceptron*) classifier)->set_max_iter(perceptron_maxiter);
00769 }
00770
00771 #ifdef HAVE_LAPACK
00772 if (ctype==CT_LDA)
00773 {
00774 if (trainfeatures->get_feature_type()!=F_DREAL ||
00775 trainfeatures->get_feature_class()!=C_DENSE)
00776 SG_ERROR("LDA requires train features of class SIMPLE type REAL.\n");
00777 ((CLDA*) classifier)->set_gamma(gamma);
00778 }
00779 #endif
00780
00781 if (ctype==CT_SVMOCAS)
00782 ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00783 #ifdef HAVE_LAPACK
00784 else if (ctype==CT_LIBLINEAR)
00785 ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2);
00786 #endif
00787 else if (ctype==CT_SVMLIN)
00788 ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2);
00789 else if (ctype==CT_SVMSGD)
00790 ((CSVMSGD*) classifier)->set_C(svm_C1, svm_C2);
00791 else if (ctype==CT_SUBGRADIENTSVM)
00792 ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2);
00793
00794 else if (ctype==CT_LPM || ctype==CT_LPBOOST)
00795 {
00796 if (trainfeatures->get_feature_class()!=C_SPARSE ||
00797 trainfeatures->get_feature_type()!=F_DREAL)
00798 SG_ERROR("LPM and LPBOOST require trainfeatures of class SPARSE type REAL.\n");
00799 }
00800
00801 ((CLinearMachine*) classifier)->set_labels(trainlabels);
00802 ((CLinearMachine*) classifier)->set_features((CDenseFeatures<float64_t>*) trainfeatures);
00803 result=((CLinearMachine*) classifier)->train();
00804
00805 return result;
00806 }
00807
00808 bool CGUIClassifier::train_wdocas()
00809 {
00810 CFeatures* trainfeatures=ui->ui_features->get_train_features();
00811 CLabels* trainlabels=ui->ui_labels->get_train_labels();
00812
00813 bool result=false;
00814
00815 if (!trainfeatures)
00816 SG_ERROR("No trainfeatures available.\n");
00817
00818 if (trainfeatures->get_feature_class()!=C_STRING ||
00819 trainfeatures->get_feature_type()!=F_BYTE )
00820 SG_ERROR("Trainfeatures are not of class STRING type BYTE.\n");
00821
00822 if (!trainlabels)
00823 SG_ERROR("No labels available.\n");
00824
00825 ((CWDSVMOcas*) classifier)->set_labels(trainlabels);
00826 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) trainfeatures);
00827 result=((CWDSVMOcas*) classifier)->train();
00828
00829 return result;
00830 }
00831
00832 bool CGUIClassifier::load(char* filename, char* type)
00833 {
00834 bool result=false;
00835
00836 if (new_classifier(type))
00837 {
00838 FILE* model_file=fopen(filename, "r");
00839 CSerializableAsciiFile* ascii_file = new CSerializableAsciiFile(model_file,'r');
00840
00841 if (ascii_file)
00842 {
00843 if (classifier && classifier->load_serializable(ascii_file))
00844 {
00845 SG_DEBUG("file successfully read.\n");
00846 result=true;
00847 }
00848 else
00849 SG_ERROR("SVM/Classifier creation/loading failed on file %s.\n", filename);
00850
00851 delete ascii_file;
00852 }
00853 else
00854 SG_ERROR("Opening file %s failed.\n", filename);
00855
00856 return result;
00857 }
00858 else
00859 SG_ERROR("Type %s of SVM/Classifier unknown.\n", type);
00860
00861 return false;
00862 }
00863
00864 bool CGUIClassifier::save(char* param)
00865 {
00866 bool result=false;
00867 param=SGIO::skip_spaces(param);
00868
00869 if (classifier)
00870 {
00871 FILE* file=fopen(param, "w");
00872 CSerializableAsciiFile* ascii_file = new CSerializableAsciiFile(file,'w');
00873
00874 if ((!ascii_file) || (!classifier->save_serializable(ascii_file)))
00875 printf("writing to file %s failed!\n", param);
00876 else
00877 {
00878 printf("successfully written classifier into \"%s\" !\n", param);
00879 result=true;
00880 }
00881
00882 if (ascii_file)
00883 delete ascii_file;
00884 }
00885 else
00886 SG_ERROR("create classifier first\n");
00887
00888 return result;
00889 }
00890
00891 bool CGUIClassifier::set_perceptron_parameters(
00892 float64_t learnrate, int32_t maxiter)
00893 {
00894 if (learnrate<=0)
00895 perceptron_learnrate=0.01;
00896 else
00897 perceptron_learnrate=learnrate;
00898
00899 if (maxiter<=0)
00900 perceptron_maxiter=1000;
00901 else
00902 perceptron_maxiter=maxiter;
00903 SG_INFO("Setting to perceptron parameters (learnrate %f and maxiter: %d\n", perceptron_learnrate, perceptron_maxiter);
00904
00905 return true;
00906 }
00907
00908 bool CGUIClassifier::set_svm_epsilon(float64_t epsilon)
00909 {
00910 if (epsilon<0)
00911 svm_epsilon=1e-4;
00912 else
00913 svm_epsilon=epsilon;
00914 SG_INFO("Set to svm_epsilon=%f.\n", svm_epsilon);
00915
00916 return true;
00917 }
00918
00919 bool CGUIClassifier::set_max_train_time(float64_t max)
00920 {
00921 if (max>0)
00922 {
00923 max_train_time=max;
00924 SG_INFO("Set to max_train_time=%f.\n", max_train_time);
00925 }
00926 else
00927 SG_INFO("Disabling max_train_time.\n");
00928
00929 return true;
00930 }
00931
00932 bool CGUIClassifier::set_svr_tube_epsilon(float64_t tube_epsilon)
00933 {
00934 if (!classifier)
00935 SG_ERROR("No regression method allocated\n");
00936
00937 if (classifier->get_classifier_type() != CT_LIBSVR &&
00938 classifier->get_classifier_type() != CT_SVRLIGHT &&
00939 classifier->get_classifier_type() != CT_MKLREGRESSION )
00940 {
00941 SG_ERROR("Underlying method not capable of SV-regression\n");
00942 }
00943
00944 if (tube_epsilon<0)
00945 svm_tube_epsilon=1e-2;
00946 svm_tube_epsilon=tube_epsilon;
00947
00948 ((CSVM*) classifier)->set_tube_epsilon(svm_tube_epsilon);
00949 SG_INFO("Set to svr_tube_epsilon=%f.\n", svm_tube_epsilon);
00950
00951 return true;
00952 }
00953
00954 bool CGUIClassifier::set_svm_nu(float64_t nu)
00955 {
00956 if (nu<0 || nu>1)
00957 nu=0.5;
00958
00959 svm_nu=nu;
00960 SG_INFO("Set to nu=%f.\n", svm_nu);
00961
00962 return true;
00963 }
00964
00965 bool CGUIClassifier::set_svm_mkl_parameters(
00966 float64_t weight_epsilon, float64_t C, float64_t norm)
00967 {
00968 if (weight_epsilon<0)
00969 weight_epsilon=1e-4;
00970 if (C<0)
00971 C=0;
00972 if (norm<0)
00973 SG_ERROR("MKL norm >= 0\n");
00974
00975 svm_weight_epsilon=weight_epsilon;
00976 C_mkl=C;
00977 mkl_norm=norm;
00978
00979 SG_INFO("Set to weight_epsilon=%f.\n", svm_weight_epsilon);
00980 SG_INFO("Set to C_mkl=%f.\n", C_mkl);
00981 SG_INFO("Set to mkl_norm=%f.\n", mkl_norm);
00982
00983 return true;
00984 }
00985
00986 bool CGUIClassifier::set_elasticnet_lambda(float64_t lambda)
00987 {
00988 if (lambda<0 || lambda>1)
00989 SG_ERROR("0 <= ent_lambda <= 1\n");
00990
00991 ent_lambda = lambda;
00992 return true;
00993 }
00994
00995 bool CGUIClassifier::set_mkl_block_norm(float64_t mkl_bnorm)
00996 {
00997 if (mkl_bnorm<1)
00998 SG_ERROR("1 <= mkl_block_norm <= inf\n");
00999
01000 mkl_block_norm=mkl_bnorm;
01001 return true;
01002 }
01003
01004
01005 bool CGUIClassifier::set_svm_C(float64_t C1, float64_t C2)
01006 {
01007 if (C1<0)
01008 svm_C1=1.0;
01009 else
01010 svm_C1=C1;
01011
01012 if (C2<0)
01013 svm_C2=svm_C1;
01014 else
01015 svm_C2=C2;
01016
01017 SG_INFO("Set to C1=%f C2=%f.\n", svm_C1, svm_C2);
01018
01019 return true;
01020 }
01021
01022 bool CGUIClassifier::set_svm_qpsize(int32_t qpsize)
01023 {
01024 if (qpsize<2)
01025 svm_qpsize=41;
01026 else
01027 svm_qpsize=qpsize;
01028 SG_INFO("Set qpsize to svm_qpsize=%d.\n", svm_qpsize);
01029
01030 return true;
01031 }
01032
01033 bool CGUIClassifier::set_svm_max_qpsize(int32_t max_qpsize)
01034 {
01035 if (max_qpsize<50)
01036 svm_max_qpsize=50;
01037 else
01038 svm_max_qpsize=max_qpsize;
01039 SG_INFO("Set max qpsize to svm_max_qpsize=%d.\n", svm_max_qpsize);
01040
01041 return true;
01042 }
01043
01044 bool CGUIClassifier::set_svm_bufsize(int32_t bufsize)
01045 {
01046 if (svm_bufsize<0)
01047 svm_bufsize=3000;
01048 else
01049 svm_bufsize=bufsize;
01050 SG_INFO("Set bufsize to svm_bufsize=%d.\n", svm_bufsize);
01051
01052 return true ;
01053 }
01054
01055 bool CGUIClassifier::set_svm_shrinking_enabled(bool enabled)
01056 {
01057 svm_use_shrinking=enabled;
01058 if (svm_use_shrinking)
01059 SG_INFO("Enabling shrinking optimization.\n");
01060 else
01061 SG_INFO("Disabling shrinking optimization.\n");
01062
01063 return true;
01064 }
01065
01066 bool CGUIClassifier::set_svm_batch_computation_enabled(bool enabled)
01067 {
01068 svm_use_batch_computation=enabled;
01069 if (svm_use_batch_computation)
01070 SG_INFO("Enabling batch computation.\n");
01071 else
01072 SG_INFO("Disabling batch computation.\n");
01073
01074 return true;
01075 }
01076
01077 bool CGUIClassifier::set_svm_linadd_enabled(bool enabled)
01078 {
01079 svm_use_linadd=enabled;
01080 if (svm_use_linadd)
01081 SG_INFO("Enabling LINADD optimization.\n");
01082 else
01083 SG_INFO("Disabling LINADD optimization.\n");
01084
01085 return true;
01086 }
01087
01088 bool CGUIClassifier::set_svm_bias_enabled(bool enabled)
01089 {
01090 svm_use_bias=enabled;
01091 if (svm_use_bias)
01092 SG_INFO("Enabling svm bias.\n");
01093 else
01094 SG_INFO("Disabling svm bias.\n");
01095
01096 return true;
01097 }
01098
01099 bool CGUIClassifier::set_mkl_interleaved_enabled(bool enabled)
01100 {
01101 mkl_use_interleaved=enabled;
01102 if (mkl_use_interleaved)
01103 SG_INFO("Enabling mkl interleaved optimization.\n");
01104 else
01105 SG_INFO("Disabling mkl interleaved optimization.\n");
01106
01107 return true;
01108 }
01109
01110 bool CGUIClassifier::set_do_auc_maximization(bool do_auc)
01111 {
01112 svm_do_auc_maximization=do_auc;
01113
01114 if (svm_do_auc_maximization)
01115 SG_INFO("Enabling AUC maximization.\n");
01116 else
01117 SG_INFO("Disabling AUC maximization.\n");
01118
01119 return true;
01120 }
01121
01122
01123 CLabels* CGUIClassifier::classify()
01124 {
01125 ASSERT(classifier);
01126
01127 switch (classifier->get_classifier_type())
01128 {
01129 case CT_LIGHT:
01130 case CT_LIGHTONECLASS:
01131 case CT_LIBSVM:
01132 case CT_SCATTERSVM:
01133 case CT_MPD:
01134 case CT_GPBT:
01135 case CT_CPLEXSVM:
01136 case CT_GMNPSVM:
01137 case CT_GNPPSVM:
01138 case CT_LIBSVR:
01139 case CT_LIBSVMMULTICLASS:
01140 case CT_LIBSVMONECLASS:
01141 case CT_SVRLIGHT:
01142 case CT_MKLCLASSIFICATION:
01143 case CT_MKLMULTICLASS:
01144 case CT_MKLREGRESSION:
01145 case CT_MKLONECLASS:
01146 case CT_KERNELRIDGEREGRESSION:
01147 return classify_kernelmachine();
01148 case CT_KNN:
01149 return classify_distancemachine();
01150 case CT_PERCEPTRON:
01151 case CT_LDA:
01152 return classify_linear();
01153 case CT_SVMLIN:
01154 case CT_SVMPERF:
01155 case CT_SUBGRADIENTSVM:
01156 case CT_SVMOCAS:
01157 case CT_SVMSGD:
01158 case CT_LPM:
01159 case CT_LPBOOST:
01160 case CT_SUBGRADIENTLPM:
01161 case CT_LIBLINEAR:
01162 return classify_linear();
01163 case CT_WDSVMOCAS:
01164 return classify_byte_linear();
01165 default:
01166 SG_ERROR("unknown classifier type\n");
01167 break;
01168 };
01169
01170 return NULL;
01171 }
01172
01173 CLabels* CGUIClassifier::classify_kernelmachine()
01174 {
01175 CFeatures* trainfeatures=ui->ui_features->get_train_features();
01176 CFeatures* testfeatures=ui->ui_features->get_test_features();
01177
01178 if (!classifier)
01179 SG_ERROR("No kernelmachine available.\n");
01180
01181 bool success=true;
01182
01183 if (ui->ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM)
01184 {
01185 if (ui->ui_kernel->get_kernel()->get_kernel_type()==K_COMBINED
01186 && ( !trainfeatures || !testfeatures ))
01187 {
01188 SG_DEBUG("skipping initialisation of combined kernel "
01189 "as train/test features are unavailable\n");
01190 }
01191 else
01192 {
01193 if (!trainfeatures)
01194 SG_ERROR("No training features available.\n");
01195 if (!testfeatures)
01196 SG_ERROR("No test features available.\n");
01197
01198 success=ui->ui_kernel->init_kernel("TEST");
01199 }
01200 }
01201
01202 if (!success || !ui->ui_kernel->is_initialized())
01203 SG_ERROR("Kernel not initialized.\n");
01204
01205 EMachineType type = classifier->get_classifier_type();
01206 if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS ||
01207 type==CT_MKLMULTICLASS)
01208 {
01209 CKernelMulticlassMachine* kmcm = (CKernelMulticlassMachine*) classifier;
01210 kmcm->set_kernel(ui->ui_kernel->get_kernel());
01211 }
01212 else
01213 {
01214 CKernelMachine* km=(CKernelMachine*) classifier;
01215 km->set_kernel(ui->ui_kernel->get_kernel());
01216 km->set_batch_computation_enabled(svm_use_batch_computation);
01217 }
01218
01219 SG_INFO("Starting kernel machine testing.\n");
01220 return classifier->apply();
01221 }
01222
01223 bool CGUIClassifier::get_trained_classifier(
01224 float64_t* &weights, int32_t &rows, int32_t &cols, float64_t*& bias,
01225 int32_t& brows, int32_t& bcols,
01226 int32_t idx)
01227 {
01228 ASSERT(classifier);
01229
01230 switch (classifier->get_classifier_type())
01231 {
01232 case CT_SCATTERSVM:
01233 case CT_GNPPSVM:
01234 case CT_LIBSVMMULTICLASS:
01235 case CT_LIGHT:
01236 case CT_LIGHTONECLASS:
01237 case CT_LIBSVM:
01238 case CT_MPD:
01239 case CT_GPBT:
01240 case CT_CPLEXSVM:
01241 case CT_GMNPSVM:
01242 case CT_LIBSVR:
01243 case CT_LIBSVMONECLASS:
01244 case CT_SVRLIGHT:
01245 case CT_MKLCLASSIFICATION:
01246 case CT_MKLREGRESSION:
01247 case CT_MKLONECLASS:
01248 case CT_MKLMULTICLASS:
01249 case CT_KERNELRIDGEREGRESSION:
01250 return get_svm(weights, rows, cols, bias, brows, bcols, idx);
01251 break;
01252 case CT_PERCEPTRON:
01253 case CT_LDA:
01254 case CT_LPM:
01255 case CT_LPBOOST:
01256 case CT_SUBGRADIENTLPM:
01257 case CT_SVMOCAS:
01258 case CT_SVMSGD:
01259 case CT_SVMLIN:
01260 case CT_SVMPERF:
01261 case CT_SUBGRADIENTSVM:
01262 case CT_LIBLINEAR:
01263 return get_linear(weights, rows, cols, bias, brows, bcols);
01264 break;
01265 case CT_KMEANS:
01266 case CT_HIERARCHICAL:
01267 return get_clustering(weights, rows, cols, bias, brows, bcols);
01268 break;
01269 case CT_KNN:
01270 SG_ERROR("not implemented");
01271 break;
01272 default:
01273 SG_ERROR("unknown classifier type\n");
01274 break;
01275 };
01276 return false;
01277 }
01278
01279
01280 int32_t CGUIClassifier::get_num_svms()
01281 {
01282 ASSERT(classifier);
01283 return ((CMulticlassSVM*) classifier)->get_num_machines();
01284 }
01285
01286 bool CGUIClassifier::get_svm(
01287 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias,
01288 int32_t& brows, int32_t& bcols, int32_t idx)
01289 {
01290 CSVM* svm=(CSVM*) classifier;
01291
01292 if (idx>-1)
01293 svm=((CMulticlassSVM*) svm)->get_svm(idx);
01294
01295 if (svm)
01296 {
01297 brows=1;
01298 bcols=1;
01299 bias=SG_MALLOC(float64_t, 1);
01300 *bias=svm->get_bias();
01301
01302 rows=svm->get_num_support_vectors();
01303 cols=2;
01304 weights=SG_MALLOC(float64_t, rows*cols);
01305
01306 for (int32_t i=0; i<rows; i++)
01307 {
01308 weights[i]=svm->get_alpha(i);
01309 weights[i+rows]=svm->get_support_vector(i);
01310 }
01311
01312 return true;
01313 }
01314
01315 return false;
01316 }
01317
01318 bool CGUIClassifier::get_clustering(
01319 float64_t* ¢ers, int32_t& rows, int32_t& cols, float64_t*& radi,
01320 int32_t& brows, int32_t& bcols)
01321 {
01322 if (!classifier)
01323 return false;
01324
01325 switch (classifier->get_classifier_type())
01326 {
01327 case CT_KMEANS:
01328 {
01329 CKMeans* clustering=(CKMeans*) classifier;
01330
01331 bcols=1;
01332 SGVector<float64_t> r=clustering->get_radiuses();
01333 brows=r.vlen;
01334 radi=SG_MALLOC(float64_t, brows);
01335 memcpy(radi, r.vector, sizeof(float64_t)*brows);
01336
01337 cols=1;
01338 SGMatrix<float64_t> c=clustering->get_cluster_centers();
01339 rows=c.num_rows;
01340 cols=c.num_cols;
01341 centers=SG_MALLOC(float64_t, rows*cols);
01342 memcpy(centers, c.matrix, sizeof(float64_t)*rows*cols);
01343 break;
01344 }
01345
01346 case CT_HIERARCHICAL:
01347 {
01348 CHierarchical* clustering=(CHierarchical*) classifier;
01349
01350
01351 bcols=1;
01352 SGVector<float64_t> r=clustering->get_merge_distances();
01353 brows=r.vlen;
01354 radi=SG_MALLOC(float64_t, brows);
01355 memcpy(radi, r.vector, sizeof(float64_t)*brows);
01356
01357 SGMatrix<int32_t> p=clustering->get_cluster_pairs();
01358 rows=p.num_rows;
01359 cols=p.num_cols;
01360 centers=SG_MALLOC(float64_t, rows*cols);
01361 for (int32_t i=0; i<rows*cols; i++)
01362 centers[i]=(float64_t) p.matrix[i];
01363
01364 break;
01365 }
01366
01367 default:
01368 SG_ERROR("internal error - unknown clustering type\n");
01369 }
01370
01371 return true;
01372 }
01373
01374 bool CGUIClassifier::get_linear(
01375 float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias,
01376 int32_t& brows, int32_t& bcols)
01377 {
01378 CLinearMachine* linear=(CLinearMachine*) classifier;
01379
01380 if (!linear)
01381 return false;
01382
01383 bias=SG_MALLOC(float64_t, 1);
01384 *bias=linear->get_bias();
01385 brows=1;
01386 bcols=1;
01387
01388 SGVector<float64_t> w=linear->get_w();
01389 cols=1;
01390 rows=w.vlen;
01391
01392 weights= SG_MALLOC(float64_t, w.vlen);
01393 memcpy(weights, w.vector, sizeof(float64_t)*w.vlen);
01394
01395 return true;
01396 }
01397
01398 CLabels* CGUIClassifier::classify_distancemachine()
01399 {
01400 CFeatures* trainfeatures=ui->ui_features->get_train_features();
01401 CFeatures* testfeatures=ui->ui_features->get_test_features();
01402
01403 if (!classifier)
01404 {
01405 SG_ERROR("no kernelmachine available\n") ;
01406 return NULL;
01407 }
01408 if (!trainfeatures)
01409 {
01410 SG_ERROR("no training features available\n") ;
01411 return NULL;
01412 }
01413
01414 if (!testfeatures)
01415 {
01416 SG_ERROR("no test features available\n") ;
01417 return NULL;
01418 }
01419
01420 bool success=ui->ui_distance->init_distance("TEST");
01421
01422 if (!success || !ui->ui_distance->is_initialized())
01423 {
01424 SG_ERROR("distance not initialized\n") ;
01425 return NULL;
01426 }
01427
01428 ((CDistanceMachine*) classifier)->set_distance(
01429 ui->ui_distance->get_distance());
01430 SG_INFO("starting distance machine testing\n") ;
01431 return classifier->apply();
01432 }
01433
01434
01435 CLabels* CGUIClassifier::classify_linear()
01436 {
01437 CFeatures* testfeatures=ui->ui_features->get_test_features();
01438
01439 if (!classifier)
01440 {
01441 SG_ERROR("no classifier available\n") ;
01442 return NULL;
01443 }
01444 if (!testfeatures)
01445 {
01446 SG_ERROR("no test features available\n") ;
01447 return NULL;
01448 }
01449 if (!(testfeatures->has_property(FP_DOT)))
01450 {
01451 SG_ERROR("testfeatures not based on DotFeatures\n") ;
01452 return NULL;
01453 }
01454
01455 ((CLinearMachine*) classifier)->set_features((CDotFeatures*) testfeatures);
01456 SG_INFO("starting linear classifier testing\n") ;
01457 return classifier->apply();
01458 }
01459
01460 CLabels* CGUIClassifier::classify_byte_linear()
01461 {
01462 CFeatures* testfeatures=ui->ui_features->get_test_features();
01463
01464 if (!classifier)
01465 {
01466 SG_ERROR("no svm available\n") ;
01467 return NULL;
01468 }
01469 if (!testfeatures)
01470 {
01471 SG_ERROR("no test features available\n") ;
01472 return NULL;
01473 }
01474 if (testfeatures->get_feature_class() != C_STRING ||
01475 testfeatures->get_feature_type() != F_BYTE )
01476 {
01477 SG_ERROR("testfeatures not of class STRING type BYTE\n") ;
01478 return NULL;
01479 }
01480
01481 ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) testfeatures);
01482 SG_INFO("starting linear classifier testing\n") ;
01483 return classifier->apply();
01484 }
01485
01486 bool CGUIClassifier::classify_example(int32_t idx, float64_t &result)
01487 {
01488 CFeatures* trainfeatures=ui->ui_features->get_train_features();
01489 CFeatures* testfeatures=ui->ui_features->get_test_features();
01490
01491 if (!classifier)
01492 {
01493 SG_ERROR("no svm available\n") ;
01494 return false;
01495 }
01496
01497 if (!ui->ui_kernel->is_initialized())
01498 {
01499 SG_ERROR("kernel not initialized\n") ;
01500 return false;
01501 }
01502
01503 if (!ui->ui_kernel->get_kernel() ||
01504 !ui->ui_kernel->get_kernel()->get_kernel_type()==K_CUSTOM)
01505 {
01506 if (!trainfeatures)
01507 {
01508 SG_ERROR("no training features available\n") ;
01509 return false;
01510 }
01511
01512 if (!testfeatures)
01513 {
01514 SG_ERROR("no test features available\n") ;
01515 return false;
01516 }
01517 }
01518
01519 ((CKernelMachine*) classifier)->set_kernel(
01520 ui->ui_kernel->get_kernel());
01521
01522 result=((CKernelMachine*)classifier)->apply_one(idx);
01523 return true ;
01524 }
01525
01526
01527 bool CGUIClassifier::set_krr_tau(float64_t tau)
01528 {
01529 #ifdef HAVE_LAPACK
01530 krr_tau=tau;
01531 ((CKernelRidgeRegression*) classifier)->set_tau(krr_tau);
01532 SG_INFO("Set to krr_tau=%f.\n", krr_tau);
01533
01534 return true;
01535 #else
01536 return false;
01537 #endif
01538 }
01539
01540 bool CGUIClassifier::set_solver(char* solver)
01541 {
01542 ESolverType s=ST_AUTO;
01543
01544 if (strncmp(solver,"NEWTON", 6)==0)
01545 {
01546 SG_INFO("Using NEWTON solver.\n");
01547 s=ST_NEWTON;
01548 }
01549 else if (strncmp(solver,"DIRECT", 6)==0)
01550 {
01551 SG_INFO("Using DIRECT solver\n");
01552 s=ST_DIRECT;
01553 }
01554 else if (strncmp(solver,"BLOCK_NORM", 9)==0)
01555 {
01556 SG_INFO("Using BLOCK_NORM solver\n");
01557 s=ST_BLOCK_NORM;
01558 }
01559 else if (strncmp(solver,"ELASTICNET", 10)==0)
01560 {
01561 SG_INFO("Using ELASTICNET solver\n");
01562 s=ST_ELASTICNET;
01563 }
01564 else if (strncmp(solver,"AUTO", 4)==0)
01565 {
01566 SG_INFO("Automagically determining solver.\n");
01567 s=ST_AUTO;
01568 }
01569 #ifdef USE_CPLEX
01570 else if (strncmp(solver, "CPLEX", 5)==0)
01571 {
01572 SG_INFO("USING CPLEX METHOD selected\n");
01573 s=ST_CPLEX;
01574 }
01575 #endif
01576 #ifdef USE_GLPK
01577 else if (strncmp(solver,"GLPK", 4)==0)
01578 {
01579 SG_INFO("Using GLPK solver\n");
01580 s=ST_GLPK;
01581 }
01582 #endif
01583 else
01584 SG_ERROR("Unknown solver type, %s (not compiled in?)\n", solver);
01585
01586
01587 solver_type=s;
01588 return true;
01589 }
01590
01591 bool CGUIClassifier::set_constraint_generator(char* name)
01592 {
01593 if (strcmp(name,"LIBSVM_ONECLASS")==0)
01594 {
01595 SG_UNREF(constraint_generator);
01596 constraint_generator = new CLibSVMOneClass();
01597 SG_INFO("created SVMlibsvm object for oneclass\n");
01598 }
01599 else if (strcmp(name,"LIBSVM_NU")==0)
01600 {
01601 SG_UNREF(constraint_generator);
01602 constraint_generator= new CLibSVM(LIBSVM_NU_SVC);
01603 SG_INFO("created SVMlibsvm object\n") ;
01604 }
01605 else if (strcmp(name,"LIBSVM")==0)
01606 {
01607 SG_UNREF(constraint_generator);
01608 constraint_generator= new CLibSVM();
01609 SG_INFO("created SVMlibsvm object\n") ;
01610 }
01611 #ifdef USE_SVMLIGHT
01612 else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0))
01613 {
01614 SG_UNREF(constraint_generator);
01615 constraint_generator= new CSVMLight();
01616 SG_INFO("created SVMLight object\n") ;
01617 }
01618 else if (strcmp(name,"SVMLIGHT_ONECLASS")==0)
01619 {
01620 SG_UNREF(constraint_generator);
01621 constraint_generator= new CSVMLightOneClass();
01622 SG_INFO("created SVMLightOneClass object\n") ;
01623 }
01624 else if (strcmp(name,"SVRLIGHT")==0)
01625 {
01626 SG_UNREF(constraint_generator);
01627 constraint_generator= new CSVRLight();
01628 SG_INFO("created SVRLight object\n") ;
01629 }
01630 #endif //USE_SVMLIGHT
01631 else if (strcmp(name,"GPBTSVM")==0)
01632 {
01633 SG_UNREF(constraint_generator);
01634 constraint_generator= new CGPBTSVM();
01635 SG_INFO("created GPBT-SVM object\n") ;
01636 }
01637 else if (strcmp(name,"MPDSVM")==0)
01638 {
01639 SG_UNREF(constraint_generator);
01640 constraint_generator= new CMPDSVM();
01641 SG_INFO("created MPD-SVM object\n") ;
01642 }
01643 else if (strcmp(name,"GNPPSVM")==0)
01644 {
01645 SG_UNREF(constraint_generator);
01646 constraint_generator= new CGNPPSVM();
01647 SG_INFO("created GNPP-SVM object\n") ;
01648 }
01649 else if (strcmp(name,"LIBSVR")==0)
01650 {
01651 SG_UNREF(constraint_generator);
01652 constraint_generator= new CLibSVR();
01653 SG_INFO("created SVRlibsvm object\n") ;
01654 }
01655 else
01656 {
01657 SG_ERROR("Unknown SV-classifier %s.\n", name);
01658 return false;
01659 }
01660 SG_REF(constraint_generator);
01661
01662 return (constraint_generator!=NULL);
01663 }