GUIClassifier.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 #include <shogun/ui/GUIClassifier.h>
00012 #include <shogun/ui/SGInterface.h>
00013 
00014 #include <shogun/lib/config.h>
00015 #include <shogun/io/SGIO.h>
00016 
00017 #include <shogun/features/SparseFeatures.h>
00018 #include <shogun/features/RealFileFeatures.h>
00019 #include <shogun/labels/Labels.h>
00020 
00021 #include <shogun/kernel/AUCKernel.h>
00022 
00023 #include <shogun/multiclass/KNN.h>
00024 #include <shogun/clustering/KMeans.h>
00025 #include <shogun/clustering/Hierarchical.h>
00026 #include <shogun/classifier/PluginEstimate.h>
00027 
00028 #include <shogun/classifier/LDA.h>
00029 #include <shogun/classifier/LPM.h>
00030 #include <shogun/classifier/LPBoost.h>
00031 #include <shogun/classifier/Perceptron.h>
00032 
00033 #include <shogun/machine/LinearMachine.h>
00034 
00035 #ifdef USE_SVMLIGHT
00036 #include <shogun/classifier/svm/SVMLight.h>
00037 #include <shogun/classifier/svm/SVMLightOneClass.h>
00038 #include <shogun/regression/svr/SVRLight.h>
00039 #endif //USE_SVMLIGHT
00040 
00041 #include <shogun/classifier/mkl/MKLClassification.h>
00042 #include <shogun/regression/svr/MKLRegression.h>
00043 #include <shogun/classifier/mkl/MKLOneClass.h>
00044 #include <shogun/classifier/mkl/MKLMulticlass.h>
00045 #include <shogun/classifier/svm/LibSVM.h>
00046 #include <shogun/multiclass/LaRank.h>
00047 #include <shogun/classifier/svm/GPBTSVM.h>
00048 #include <shogun/classifier/svm/LibSVMOneClass.h>
00049 #include <shogun/multiclass/MulticlassLibSVM.h>
00050 
00051 #include <shogun/regression/svr/LibSVR.h>
00052 #include <shogun/regression/KernelRidgeRegression.h>
00053 
00054 #include <shogun/classifier/svm/LibLinear.h>
00055 #include <shogun/classifier/svm/MPDSVM.h>
00056 #include <shogun/classifier/svm/GNPPSVM.h>
00057 #include <shogun/multiclass/GMNPSVM.h>
00058 #include <shogun/multiclass/ScatterSVM.h>
00059 
00060 #include <shogun/classifier/svm/SVMLin.h>
00061 #include <shogun/classifier/svm/SubGradientSVM.h>
00062 #include <shogun/classifier/SubGradientLPM.h>
00063 #include <shogun/classifier/svm/SVMOcas.h>
00064 #include <shogun/classifier/svm/SVMSGD.h>
00065 #include <shogun/classifier/svm/WDSVMOcas.h>
00066 
00067 #include <shogun/io/SerializableAsciiFile.h>
00068 
00069 using namespace shogun;
00070 
00071 CGUIClassifier::CGUIClassifier(CSGInterface* ui_)
00072 : CSGObject(), ui(ui_)
00073 {
00074     constraint_generator=NULL;
00075     classifier=NULL;
00076     max_train_time=0;
00077 
00078     // Perceptron parameters
00079     perceptron_learnrate=0.1;
00080     perceptron_maxiter=1000;
00081 
00082     // SVM parameters
00083     svm_qpsize=41;
00084     svm_bufsize=3000;
00085     svm_max_qpsize=1000;
00086     mkl_norm=1;
00087     ent_lambda=0;
00088     mkl_block_norm=4;
00089     svm_C1=1;
00090     svm_C2=1;
00091     C_mkl=0;
00092     mkl_use_interleaved=true;
00093     svm_weight_epsilon=1e-5;
00094     svm_epsilon=1e-5;
00095     svm_tube_epsilon=1e-2;
00096     svm_nu=0.5;
00097     svm_use_shrinking = true ;
00098 
00099     svm_use_bias = true;
00100     svm_use_batch_computation = true ;
00101     svm_use_linadd = true ;
00102     svm_do_auc_maximization = false ;
00103 
00104     // KRR parameters
00105     krr_tau=1;
00106 
00107     solver_type=ST_AUTO;
00108 }
00109 
00110 CGUIClassifier::~CGUIClassifier()
00111 {
00112     SG_UNREF(classifier);
00113     SG_UNREF(constraint_generator);
00114 }
00115 
00116 bool CGUIClassifier::new_classifier(char* name, int32_t d, int32_t from_d)
00117 {
00118     if (strcmp(name,"LIBSVM_ONECLASS")==0)
00119     {
00120         SG_UNREF(classifier);
00121         classifier = new CLibSVMOneClass();
00122         SG_INFO("created SVMlibsvm object for oneclass\n");
00123     }
00124     else if (strcmp(name,"LIBSVM_MULTICLASS")==0)
00125     {
00126         SG_UNREF(classifier);
00127         classifier = new CMulticlassLibSVM();
00128         SG_INFO("created SVMlibsvm object for multiclass\n");
00129     }
00130     else if (strcmp(name,"LIBSVM_NUMULTICLASS")==0)
00131     {
00132         SG_UNREF(classifier);
00133         classifier= new CMulticlassLibSVM(LIBSVM_NU_SVC);
00134         SG_INFO("created SVMlibsvm object for multiclass\n") ;
00135     }
00136 #ifdef USE_SVMLIGHT
00137     else if (strcmp(name,"SCATTERSVM_NO_BIAS_SVMLIGHT")==0)
00138     {
00139         SG_UNREF(classifier);
00140         classifier= new CScatterSVM(NO_BIAS_SVMLIGHT);
00141         SG_INFO("created ScatterSVM NO BIAS SVMLIGHT object\n") ;
00142     }
00143 #endif //USE_SVMLIGHT
00144     else if (strcmp(name,"SCATTERSVM_NO_BIAS_LIBSVM")==0)
00145     {
00146         SG_UNREF(classifier);
00147         classifier= new CScatterSVM(NO_BIAS_LIBSVM);
00148         SG_INFO("created ScatterSVM NO BIAS LIBSVM object\n") ;
00149     }
00150     else if (strcmp(name,"SCATTERSVM_TESTRULE1")==0)
00151     {
00152         SG_UNREF(classifier);
00153         classifier= new CScatterSVM(TEST_RULE1);
00154         SG_INFO("created ScatterSVM TESTRULE1 object\n") ;
00155     }
00156     else if (strcmp(name,"SCATTERSVM_TESTRULE2")==0)
00157     {
00158         SG_UNREF(classifier);
00159         classifier= new CScatterSVM(TEST_RULE2);
00160         SG_INFO("created ScatterSVM TESTRULE2 object\n") ;
00161     }
00162     else if (strcmp(name,"LIBSVM_NU")==0)
00163     {
00164         SG_UNREF(classifier);
00165         classifier= new CLibSVM(LIBSVM_NU_SVC);
00166         SG_INFO("created SVMlibsvm object\n") ;
00167     }
00168     else if (strcmp(name,"LIBSVM")==0)
00169     {
00170         SG_UNREF(classifier);
00171         classifier= new CLibSVM();
00172         SG_INFO("created SVMlibsvm object\n") ;
00173     }
00174     else if (strcmp(name,"LARANK")==0)
00175     {
00176         SG_UNREF(classifier);
00177         classifier= new CLaRank();
00178         SG_INFO("created LaRank object\n") ;
00179     }
00180 #ifdef USE_SVMLIGHT
00181     else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0))
00182     {
00183         SG_UNREF(classifier);
00184         classifier= new CSVMLight();
00185         SG_INFO("created SVMLight object\n") ;
00186     }
00187     else if (strcmp(name,"SVMLIGHT_ONECLASS")==0)
00188     {
00189         SG_UNREF(classifier);
00190         classifier= new CSVMLightOneClass();
00191         SG_INFO("created SVMLightOneClass object\n") ;
00192     }
00193     else if (strcmp(name,"SVRLIGHT")==0)
00194     {
00195         SG_UNREF(classifier);
00196         classifier= new CSVRLight();
00197         SG_INFO("created SVRLight object\n") ;
00198     }
00199 #endif //USE_SVMLIGHT
00200     else if (strcmp(name,"GPBTSVM")==0)
00201     {
00202         SG_UNREF(classifier);
00203         classifier= new CGPBTSVM();
00204         SG_INFO("created GPBT-SVM object\n") ;
00205     }
00206     else if (strcmp(name,"MPDSVM")==0)
00207     {
00208         SG_UNREF(classifier);
00209         classifier= new CMPDSVM();
00210         SG_INFO("created MPD-SVM object\n") ;
00211     }
00212     else if (strcmp(name,"GNPPSVM")==0)
00213     {
00214         SG_UNREF(classifier);
00215         classifier= new CGNPPSVM();
00216         SG_INFO("created GNPP-SVM object\n") ;
00217     }
00218     else if (strcmp(name,"GMNPSVM")==0)
00219     {
00220         SG_UNREF(classifier);
00221         classifier= new CGMNPSVM();
00222         SG_INFO("created GMNP-SVM object\n") ;
00223     }
00224     else if (strcmp(name,"LIBSVR")==0)
00225     {
00226         SG_UNREF(classifier);
00227         classifier= new CLibSVR();
00228         SG_INFO("created SVRlibsvm object\n") ;
00229     }
00230 #ifdef HAVE_LAPACK
00231     else if (strcmp(name, "KERNELRIDGEREGRESSION")==0)
00232     {
00233         SG_UNREF(classifier);
00234         classifier=new CKernelRidgeRegression(krr_tau, ui->ui_kernel->get_kernel(),
00235             ui->ui_labels->get_train_labels());
00236         SG_INFO("created KernelRidgeRegression object %p\n", classifier);
00237     }
00238 #endif //HAVE_LAPACK
00239     else if (strcmp(name,"PERCEPTRON")==0)
00240     {
00241         SG_UNREF(classifier);
00242         classifier= new CPerceptron();
00243         SG_INFO("created Perceptron object\n") ;
00244     }
00245 #ifdef HAVE_LAPACK
00246     else if (strncmp(name,"LIBLINEAR",9)==0)
00247     {
00248         LIBLINEAR_SOLVER_TYPE st=L2R_LR;
00249 
00250         if (strcmp(name,"LIBLINEAR_L2R_LR")==0)
00251         {
00252             st=L2R_LR;
00253             SG_INFO("created LibLinear l2 regularized logistic regression object\n") ;
00254         }
00255         else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC_DUAL")==0)
00256         {
00257             st=L2R_L2LOSS_SVC_DUAL;
00258             SG_INFO("created LibLinear l2 regularized l2 loss SVM dual object\n") ;
00259         }
00260         else if (strcmp(name,"LIBLINEAR_L2R_L2LOSS_SVC")==0)
00261         {
00262             st=L2R_L2LOSS_SVC;
00263             SG_INFO("created LibLinear l2 regularized l2 loss SVM primal object\n") ;
00264         }
00265         else if (strcmp(name,"LIBLINEAR_L1R_L2LOSS_SVC")==0)
00266         {
00267             st=L1R_L2LOSS_SVC;
00268             SG_INFO("created LibLinear l1 regularized l2 loss SVM primal object\n") ;
00269         }
00270         else if (strcmp(name,"LIBLINEAR_L2R_L1LOSS_SVC_DUAL")==0)
00271         {
00272             st=L2R_L1LOSS_SVC_DUAL;
00273             SG_INFO("created LibLinear l2 regularized l1 loss dual SVM object\n") ;
00274         }
00275         else
00276             SG_ERROR("unknown liblinear type\n");
00277 
00278         SG_UNREF(classifier);
00279         classifier= new CLibLinear(st);
00280         ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2);
00281         ((CLibLinear*) classifier)->set_epsilon(svm_epsilon);
00282         ((CLibLinear*) classifier)->set_bias_enabled(svm_use_bias);
00283     }
00284     else if (strcmp(name,"LDA")==0)
00285     {
00286         SG_UNREF(classifier);
00287         classifier= new CLDA();
00288         SG_INFO("created LDA object\n") ;
00289     }
00290 #endif //HAVE_LAPACK
00291 #ifdef USE_CPLEX
00292     else if (strcmp(name,"LPM")==0)
00293     {
00294         SG_UNREF(classifier);
00295         classifier= new CLPM();
00296         ((CLPM*) classifier)->set_C(svm_C1, svm_C2);
00297         ((CLPM*) classifier)->set_epsilon(svm_epsilon);
00298         ((CLPM*) classifier)->set_bias_enabled(svm_use_bias);
00299         ((CLPM*) classifier)->set_max_train_time(max_train_time);
00300         SG_INFO("created LPM object\n") ;
00301     }
00302     else if (strcmp(name,"LPBOOST")==0)
00303     {
00304         SG_UNREF(classifier);
00305         classifier= new CLPBoost();
00306         ((CLPBoost*) classifier)->set_C(svm_C1, svm_C2);
00307         ((CLPBoost*) classifier)->set_epsilon(svm_epsilon);
00308         ((CLPBoost*) classifier)->set_bias_enabled(svm_use_bias);
00309         ((CLPBoost*) classifier)->set_max_train_time(max_train_time);
00310         SG_INFO("created LPBoost object\n") ;
00311     }
00312     else if (strcmp(name,"SUBGRADIENTLPM")==0)
00313     {
00314         SG_UNREF(classifier);
00315         classifier= new CSubGradientLPM();
00316 
00317         ((CSubGradientLPM*) classifier)->set_bias_enabled(svm_use_bias);
00318         ((CSubGradientLPM*) classifier)->set_qpsize(svm_qpsize);
00319         ((CSubGradientLPM*) classifier)->set_qpsize_max(svm_max_qpsize);
00320         ((CSubGradientLPM*) classifier)->set_C(svm_C1, svm_C2);
00321         ((CSubGradientLPM*) classifier)->set_epsilon(svm_epsilon);
00322         ((CSubGradientLPM*) classifier)->set_max_train_time(max_train_time);
00323         SG_INFO("created Subgradient LPM object\n") ;
00324     }
00325 #endif //USE_CPLEX
00326     else if (strncmp(name,"KNN", strlen("KNN"))==0)
00327     {
00328         SG_UNREF(classifier);
00329         classifier= new CKNN();
00330         SG_INFO("created KNN object\n") ;
00331     }
00332     else if (strncmp(name,"KMEANS", strlen("KMEANS"))==0)
00333     {
00334         SG_UNREF(classifier);
00335         classifier= new CKMeans();
00336         SG_INFO("created KMeans object\n") ;
00337     }
00338     else if (strncmp(name,"HIERARCHICAL", strlen("HIERARCHICAL"))==0)
00339     {
00340         SG_UNREF(classifier);
00341         classifier= new CHierarchical();
00342         SG_INFO("created Hierarchical clustering object\n") ;
00343     }
00344     else if (strcmp(name,"SVMLIN")==0)
00345     {
00346         SG_UNREF(classifier);
00347         classifier= new CSVMLin();
00348         ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2);
00349         ((CSVMLin*) classifier)->set_epsilon(svm_epsilon);
00350         ((CSVMLin*) classifier)->set_bias_enabled(svm_use_bias);
00351         SG_INFO("created SVMLin object\n") ;
00352     }
00353     else if (strcmp(name,"SUBGRADIENTSVM")==0)
00354     {
00355         SG_UNREF(classifier);
00356         classifier= new CSubGradientSVM();
00357 
00358         ((CSubGradientSVM*) classifier)->set_bias_enabled(svm_use_bias);
00359         ((CSubGradientSVM*) classifier)->set_qpsize(svm_qpsize);
00360         ((CSubGradientSVM*) classifier)->set_qpsize_max(svm_max_qpsize);
00361         ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2);
00362         ((CSubGradientSVM*) classifier)->set_epsilon(svm_epsilon);
00363         ((CSubGradientSVM*) classifier)->set_max_train_time(max_train_time);
00364         SG_INFO("created Subgradient SVM object\n") ;
00365     }
00366     else if (strncmp(name,"WDSVMOCAS", strlen("WDSVMOCAS"))==0)
00367     {
00368         SG_UNREF(classifier);
00369         classifier= new CWDSVMOcas(SVM_OCAS);
00370 
00371         ((CWDSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00372         ((CWDSVMOcas*) classifier)->set_degree(d, from_d);
00373         ((CWDSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00374         ((CWDSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00375         ((CWDSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00376         SG_INFO("created Weighted Degree Kernel SVM Ocas(OCAS) object of order %d (from order:%d)\n", d, from_d) ;
00377     }
00378     else if (strcmp(name,"SVMOCAS")==0)
00379     {
00380         SG_UNREF(classifier);
00381         classifier= new CSVMOcas(SVM_OCAS);
00382 
00383         ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00384         ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00385         ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00386         ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00387         SG_INFO("created SVM Ocas(OCAS) object\n") ;
00388     }
00389     else if (strcmp(name,"SVMSGD")==0)
00390     {
00391         SG_UNREF(classifier);
00392         classifier= new CSVMSGD(svm_C1);
00393         ((CSVMSGD*) classifier)->set_bias_enabled(svm_use_bias);
00394         SG_INFO("created SVM SGD object\n") ;
00395     }
00396     else if (strcmp(name,"SVMBMRM")==0 || (strcmp(name,"SVMPERF")==0))
00397     {
00398         SG_UNREF(classifier);
00399         classifier= new CSVMOcas(SVM_BMRM);
00400 
00401         ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00402         ((CSVMOcas*) classifier)->set_epsilon(svm_epsilon);
00403         ((CSVMOcas*) classifier)->set_bufsize(svm_bufsize);
00404         ((CSVMOcas*) classifier)->set_bias_enabled(svm_use_bias);
00405         SG_INFO("created SVM Ocas(BMRM/PERF) object\n") ;
00406     }
00407     else if (strcmp(name,"MKL_CLASSIFICATION")==0)
00408     {
00409         SG_UNREF(classifier);
00410         classifier= new CMKLClassification();
00411     }
00412     else if (strcmp(name,"MKL_ONECLASS")==0)
00413     {
00414         SG_UNREF(classifier);
00415         classifier= new CMKLOneClass();
00416     }
00417     else if (strcmp(name,"MKL_MULTICLASS")==0)
00418     {
00419         SG_UNREF(classifier);
00420         classifier= new CMKLMulticlass();
00421     }
00422     else if (strcmp(name,"MKL_REGRESSION")==0)
00423     {
00424         SG_UNREF(classifier);
00425         classifier= new CMKLRegression();
00426     }
00427     else
00428     {
00429         SG_ERROR("Unknown classifier %s.\n", name);
00430         return false;
00431     }
00432     SG_REF(classifier);
00433 
00434     return (classifier!=NULL);
00435 }
00436 
00437 bool CGUIClassifier::train_mkl_multiclass()
00438 {
00439     CMKLMulticlass* mkl= (CMKLMulticlass*) classifier;
00440     if (!mkl)
00441         SG_ERROR("No MKL available.\n");
00442 
00443     CLabels* trainlabels=ui->ui_labels->get_train_labels();
00444     if (!trainlabels)
00445         SG_ERROR("No trainlabels available.\n");
00446 
00447     CKernel* kernel=ui->ui_kernel->get_kernel();
00448     if (!kernel)
00449         SG_ERROR("No kernel available.\n");
00450 
00451     bool success=ui->ui_kernel->init_kernel("TRAIN");
00452 
00453     if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00454         SG_ERROR("Kernel not initialized / no train features available.\n");
00455 
00456     int32_t num_vec=kernel->get_num_vec_lhs();
00457     if (trainlabels->get_num_labels() != num_vec)
00458         SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00459 
00460     SG_INFO("Starting MC-MKL training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00461 
00462     mkl->set_mkl_epsilon(svm_weight_epsilon);
00463     mkl->set_mkl_norm(mkl_norm);
00464     //mkl->set_max_num_mkliters(-1);
00465     mkl->set_solver_type(solver_type);
00466     mkl->set_bias_enabled(svm_use_bias);
00467     mkl->set_epsilon(svm_epsilon);
00468     mkl->set_max_train_time(max_train_time);
00469     mkl->set_tube_epsilon(svm_tube_epsilon);
00470     mkl->set_nu(svm_nu);
00471     mkl->set_C(svm_C1);
00472     mkl->set_qpsize(svm_qpsize);
00473     mkl->set_shrinking_enabled(svm_use_shrinking);
00474     mkl->set_linadd_enabled(svm_use_linadd);
00475     mkl->set_batch_computation_enabled(svm_use_batch_computation);
00476 
00477     ((CKernelMulticlassMachine*) mkl)->set_labels(trainlabels);
00478     ((CKernelMulticlassMachine*) mkl)->set_kernel(kernel);
00479 
00480     return mkl->train();
00481 }
00482 
00483 bool CGUIClassifier::train_mkl()
00484 {
00485     CMKL* mkl= (CMKL*) classifier;
00486     if (!mkl)
00487         SG_ERROR("No SVM available.\n");
00488 
00489     bool oneclass=(mkl->get_classifier_type()==CT_LIBSVMONECLASS);
00490     CLabels* trainlabels=NULL;
00491     if(!oneclass)
00492         trainlabels=ui->ui_labels->get_train_labels();
00493     else
00494         SG_INFO("Training one class mkl.\n");
00495     if (!trainlabels && !oneclass)
00496         SG_ERROR("No trainlabels available.\n");
00497 
00498     CKernel* kernel=ui->ui_kernel->get_kernel();
00499     if (!kernel)
00500         SG_ERROR("No kernel available.\n");
00501 
00502     bool success=ui->ui_kernel->init_kernel("TRAIN");
00503     if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00504         SG_ERROR("Kernel not initialized.\n");
00505 
00506     int32_t num_vec=kernel->get_num_vec_lhs();
00507     if (!oneclass && trainlabels->get_num_labels() != num_vec)
00508         SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00509 
00510     SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00511 
00512     if (constraint_generator)
00513         mkl->set_constraint_generator(constraint_generator);
00514     mkl->set_solver_type(solver_type);
00515     mkl->set_bias_enabled(svm_use_bias);
00516     mkl->set_epsilon(svm_epsilon);
00517     mkl->set_max_train_time(max_train_time);
00518     mkl->set_tube_epsilon(svm_tube_epsilon);
00519     mkl->set_nu(svm_nu);
00520     mkl->set_C(svm_C1, svm_C2);
00521     mkl->set_qpsize(svm_qpsize);
00522     mkl->set_shrinking_enabled(svm_use_shrinking);
00523     mkl->set_linadd_enabled(svm_use_linadd);
00524     mkl->set_batch_computation_enabled(svm_use_batch_computation);
00525     mkl->set_mkl_epsilon(svm_weight_epsilon);
00526     mkl->set_mkl_norm(mkl_norm);
00527     mkl->set_elasticnet_lambda(ent_lambda);
00528     mkl->set_mkl_block_norm(mkl_block_norm);
00529     mkl->set_C_mkl(C_mkl);
00530     mkl->set_interleaved_optimization_enabled(mkl_use_interleaved);
00531 
00532     if (svm_do_auc_maximization)
00533     {
00534         CAUCKernel* auc_kernel = new CAUCKernel(10, kernel);
00535         CLabels* auc_labels= auc_kernel->setup_auc_maximization(trainlabels);
00536         ((CKernelMachine*) mkl)->set_labels(auc_labels);
00537         ((CKernelMachine*) mkl)->set_kernel(auc_kernel);
00538         SG_UNREF(auc_labels);
00539     }
00540     else
00541     {
00542         if(!oneclass)
00543             ((CKernelMachine*) mkl)->set_labels(trainlabels);
00544         ((CKernelMachine*) mkl)->set_kernel(kernel);
00545     }
00546 
00547     bool result=mkl->train();
00548 
00549     return result;
00550 }
00551 
00552 bool CGUIClassifier::train_svm()
00553 {
00554     EMachineType type = classifier->get_classifier_type();
00555     
00556     if (!classifier)
00557         SG_ERROR("No SVM available.\n");
00558 
00559     bool oneclass=(type==CT_LIBSVMONECLASS);
00560     CLabels* trainlabels=NULL;
00561     if(!oneclass)
00562         trainlabels=ui->ui_labels->get_train_labels();
00563     else
00564         SG_INFO("Training one class svm.\n");
00565     if (!trainlabels && !oneclass)
00566         SG_ERROR("No trainlabels available.\n");
00567 
00568     CKernel* kernel=ui->ui_kernel->get_kernel();
00569     if (!kernel)
00570         SG_ERROR("No kernel available.\n");
00571 
00572     bool success=ui->ui_kernel->init_kernel("TRAIN");
00573 
00574     if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00575         SG_ERROR("Kernel not initialized / no train features available.\n");
00576 
00577     int32_t num_vec=kernel->get_num_vec_lhs();
00578     if (!oneclass && trainlabels->get_num_labels() != num_vec)
00579         SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00580 
00581     SG_INFO("Starting SVM training on %ld vectors using C1=%lf C2=%lf epsilon=%lf\n", num_vec, svm_C1, svm_C2, svm_epsilon);
00582     
00583     if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS)
00584     {
00585         CMulticlassSVM* svm = (CMulticlassSVM*)classifier;
00586         svm->set_solver_type(solver_type);
00587         svm->set_bias_enabled(svm_use_bias);
00588         svm->set_epsilon(svm_epsilon);
00589         svm->set_max_train_time(max_train_time);
00590         svm->set_tube_epsilon(svm_tube_epsilon);
00591         svm->set_nu(svm_nu);
00592         svm->set_C(svm_C1);
00593         svm->set_qpsize(svm_qpsize);
00594         svm->set_shrinking_enabled(svm_use_shrinking);
00595         svm->set_linadd_enabled(svm_use_linadd);
00596         svm->set_batch_computation_enabled(svm_use_batch_computation);
00597     }
00598     else
00599     {
00600         CSVM* svm = (CSVM*)classifier;
00601         svm->set_solver_type(solver_type);
00602         svm->set_bias_enabled(svm_use_bias);
00603         svm->set_epsilon(svm_epsilon);
00604         svm->set_max_train_time(max_train_time);
00605         svm->set_tube_epsilon(svm_tube_epsilon);
00606         svm->set_nu(svm_nu);
00607         svm->set_C(svm_C1, svm_C2);
00608         svm->set_qpsize(svm_qpsize);
00609         svm->set_shrinking_enabled(svm_use_shrinking);
00610         svm->set_linadd_enabled(svm_use_linadd);
00611         svm->set_batch_computation_enabled(svm_use_batch_computation);
00612     }
00613 
00614     if (type==CT_MKLMULTICLASS)
00615     {
00616         ((CMKLMulticlass *)classifier)->set_mkl_epsilon(svm_weight_epsilon);
00617     }
00618 
00619     if (svm_do_auc_maximization)
00620     {
00621         CAUCKernel* auc_kernel = new CAUCKernel(10, kernel);
00622         CLabels* auc_labels = auc_kernel->setup_auc_maximization(trainlabels);
00623         ((CKernelMachine*)classifier)->set_labels(auc_labels);
00624         ((CKernelMachine*)classifier)->set_kernel(auc_kernel);
00625         SG_UNREF(auc_labels);
00626     }
00627     else
00628     {
00629         if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS)
00630         {
00631             ((CKernelMulticlassMachine*)classifier)->set_labels(trainlabels);
00632             ((CKernelMulticlassMachine*)classifier)->set_kernel(kernel);
00633         }
00634         else 
00635         {
00636             if(!oneclass)
00637                 ((CKernelMachine*)classifier)->set_labels(trainlabels);
00638 
00639             ((CKernelMachine*)classifier)->set_kernel(kernel);
00640         }
00641     }
00642 
00643     bool result = classifier->train();
00644 
00645     return result;
00646 }
00647 
00648 bool CGUIClassifier::train_clustering(int32_t k, int32_t max_iter)
00649 {
00650     bool result=false;
00651     CDistance* distance=ui->ui_distance->get_distance();
00652 
00653     if (!distance)
00654         SG_ERROR("No distance available\n");
00655 
00656     if (!ui->ui_distance->init_distance("TRAIN"))
00657         SG_ERROR("Initializing distance with train features failed.\n");
00658 
00659     ((CDistanceMachine*) classifier)->set_distance(distance);
00660 
00661     EMachineType type=classifier->get_classifier_type();
00662     switch (type)
00663     {
00664         case CT_KMEANS:
00665         {
00666             ((CKMeans*) classifier)->set_k(k);
00667             ((CKMeans*) classifier)->set_max_iter(max_iter);
00668             result=((CKMeans*) classifier)->train();
00669             break;
00670         }
00671         case CT_HIERARCHICAL:
00672         {
00673             ((CHierarchical*) classifier)->set_merges(k);
00674             result=((CHierarchical*) classifier)->train();
00675             break;
00676         }
00677         default:
00678             SG_ERROR("Unknown clustering type %d\n", type);
00679     }
00680 
00681     return result;
00682 }
00683 
00684 bool CGUIClassifier::train_knn(int32_t k)
00685 {
00686     CLabels* trainlabels=ui->ui_labels->get_train_labels();
00687     CDistance* distance=ui->ui_distance->get_distance();
00688 
00689     bool result=false;
00690 
00691     if (trainlabels)
00692     {
00693         if (distance)
00694         {
00695             if (!ui->ui_distance->init_distance("TRAIN"))
00696                 SG_ERROR("Initializing distance with train features failed.\n");
00697             ((CKNN*) classifier)->set_labels(trainlabels);
00698             ((CKNN*) classifier)->set_distance(distance);
00699             ((CKNN*) classifier)->set_k(k);
00700             result=((CKNN*) classifier)->train();
00701         }
00702         else
00703             SG_ERROR("No distance available.\n");
00704     }
00705     else
00706         SG_ERROR("No labels available\n");
00707 
00708     return result;
00709 }
00710 
00711 bool CGUIClassifier::train_krr()
00712 {
00713 #ifdef HAVE_LAPACK
00714     CKernelRidgeRegression* krr= (CKernelRidgeRegression*) classifier;
00715     if (!krr)
00716         SG_ERROR("No SVM available.\n");
00717 
00718     CLabels* trainlabels=NULL;
00719     trainlabels=ui->ui_labels->get_train_labels();
00720     if (!trainlabels)
00721         SG_ERROR("No trainlabels available.\n");
00722 
00723     CKernel* kernel=ui->ui_kernel->get_kernel();
00724     if (!kernel)
00725         SG_ERROR("No kernel available.\n");
00726 
00727     bool success=ui->ui_kernel->init_kernel("TRAIN");
00728 
00729     if (!success || !ui->ui_kernel->is_initialized() || !kernel->has_features())
00730         SG_ERROR("Kernel not initialized / no train features available.\n");
00731 
00732     int32_t num_vec=kernel->get_num_vec_lhs();
00733     if (trainlabels->get_num_labels() != num_vec)
00734         SG_ERROR("Number of train labels (%d) and training vectors (%d) differs!\n", trainlabels->get_num_labels(), num_vec);
00735 
00736 
00737     // Set training labels and kernel
00738     krr->set_labels(trainlabels);
00739     krr->set_kernel(kernel);
00740 
00741     bool result=krr->train();
00742     return result;
00743 #else
00744     return false;
00745 #endif
00746 }
00747 
00748 bool CGUIClassifier::train_linear(float64_t gamma)
00749 {
00750     ASSERT(classifier);
00751     EMachineType ctype = classifier->get_classifier_type();
00752     CFeatures* trainfeatures=ui->ui_features->get_train_features();
00753     CLabels* trainlabels=ui->ui_labels->get_train_labels();
00754     bool result=false;
00755 
00756     if (!trainfeatures)
00757         SG_ERROR("No trainfeatures available.\n");
00758 
00759     if (!trainfeatures->has_property(FP_DOT))
00760         SG_ERROR("Trainfeatures not based on DotFeatures.\n");
00761 
00762     if (!trainlabels)
00763         SG_ERROR("No labels available\n");
00764 
00765     if (ctype==CT_PERCEPTRON)
00766     {
00767         ((CPerceptron*) classifier)->set_learn_rate(perceptron_learnrate);
00768         ((CPerceptron*) classifier)->set_max_iter(perceptron_maxiter);
00769     }
00770 
00771 #ifdef HAVE_LAPACK
00772     if (ctype==CT_LDA)
00773     {
00774         if (trainfeatures->get_feature_type()!=F_DREAL ||
00775                 trainfeatures->get_feature_class()!=C_DENSE)
00776         SG_ERROR("LDA requires train features of class SIMPLE type REAL.\n");
00777         ((CLDA*) classifier)->set_gamma(gamma);
00778     }
00779 #endif
00780 
00781     if (ctype==CT_SVMOCAS)
00782         ((CSVMOcas*) classifier)->set_C(svm_C1, svm_C2);
00783 #ifdef HAVE_LAPACK
00784     else if (ctype==CT_LIBLINEAR)
00785         ((CLibLinear*) classifier)->set_C(svm_C1, svm_C2);
00786 #endif
00787     else if (ctype==CT_SVMLIN)
00788         ((CSVMLin*) classifier)->set_C(svm_C1, svm_C2);
00789     else if (ctype==CT_SVMSGD)
00790         ((CSVMSGD*) classifier)->set_C(svm_C1, svm_C2);
00791     else if (ctype==CT_SUBGRADIENTSVM)
00792         ((CSubGradientSVM*) classifier)->set_C(svm_C1, svm_C2);
00793 
00794     else if (ctype==CT_LPM || ctype==CT_LPBOOST)
00795     {
00796         if (trainfeatures->get_feature_class()!=C_SPARSE ||
00797                 trainfeatures->get_feature_type()!=F_DREAL)
00798             SG_ERROR("LPM and LPBOOST require trainfeatures of class SPARSE type REAL.\n");
00799     }
00800 
00801     ((CLinearMachine*) classifier)->set_labels(trainlabels);
00802     ((CLinearMachine*) classifier)->set_features((CDenseFeatures<float64_t>*) trainfeatures);
00803     result=((CLinearMachine*) classifier)->train();
00804 
00805     return result;
00806 }
00807 
00808 bool CGUIClassifier::train_wdocas()
00809 {
00810     CFeatures* trainfeatures=ui->ui_features->get_train_features();
00811     CLabels* trainlabels=ui->ui_labels->get_train_labels();
00812 
00813     bool result=false;
00814 
00815     if (!trainfeatures)
00816         SG_ERROR("No trainfeatures available.\n");
00817 
00818     if (trainfeatures->get_feature_class()!=C_STRING ||
00819             trainfeatures->get_feature_type()!=F_BYTE )
00820         SG_ERROR("Trainfeatures are not of class STRING type BYTE.\n");
00821 
00822     if (!trainlabels)
00823         SG_ERROR("No labels available.\n");
00824 
00825     ((CWDSVMOcas*) classifier)->set_labels(trainlabels);
00826     ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) trainfeatures);
00827     result=((CWDSVMOcas*) classifier)->train();
00828 
00829     return result;
00830 }
00831 
00832 bool CGUIClassifier::load(char* filename, char* type)
00833 {
00834     bool result=false;
00835 
00836     if (new_classifier(type))
00837     {
00838         FILE* model_file=fopen(filename, "r");
00839         CSerializableAsciiFile* ascii_file = new CSerializableAsciiFile(model_file,'r');
00840 
00841         if (ascii_file)
00842         {
00843             if (classifier && classifier->load_serializable(ascii_file))
00844             {
00845                 SG_DEBUG("file successfully read.\n");
00846                 result=true;
00847             }
00848             else
00849                 SG_ERROR("SVM/Classifier creation/loading failed on file %s.\n", filename);
00850 
00851             delete ascii_file;
00852         }
00853         else
00854             SG_ERROR("Opening file %s failed.\n", filename);
00855 
00856         return result;
00857     }
00858     else
00859         SG_ERROR("Type %s of SVM/Classifier unknown.\n", type);
00860 
00861     return false;
00862 }
00863 
00864 bool CGUIClassifier::save(char* param)
00865 {
00866     bool result=false;
00867     param=SGIO::skip_spaces(param);
00868 
00869     if (classifier)
00870     {
00871         FILE* file=fopen(param, "w");
00872         CSerializableAsciiFile* ascii_file = new CSerializableAsciiFile(file,'w');
00873 
00874         if ((!ascii_file) || (!classifier->save_serializable(ascii_file)))
00875             printf("writing to file %s failed!\n", param);
00876         else
00877         {
00878             printf("successfully written classifier into \"%s\" !\n", param);
00879             result=true;
00880         }
00881 
00882         if (ascii_file)
00883             delete ascii_file;
00884     }
00885     else
00886         SG_ERROR("create classifier first\n");
00887 
00888     return result;
00889 }
00890 
00891 bool CGUIClassifier::set_perceptron_parameters(
00892     float64_t learnrate, int32_t maxiter)
00893 {
00894     if (learnrate<=0)
00895         perceptron_learnrate=0.01;
00896     else
00897         perceptron_learnrate=learnrate;
00898 
00899     if (maxiter<=0)
00900         perceptron_maxiter=1000;
00901     else
00902         perceptron_maxiter=maxiter;
00903     SG_INFO("Setting to perceptron parameters (learnrate %f and maxiter: %d\n", perceptron_learnrate, perceptron_maxiter);
00904 
00905     return true;
00906 }
00907 
00908 bool CGUIClassifier::set_svm_epsilon(float64_t epsilon)
00909 {
00910     if (epsilon<0)
00911         svm_epsilon=1e-4;
00912     else
00913         svm_epsilon=epsilon;
00914     SG_INFO("Set to svm_epsilon=%f.\n", svm_epsilon);
00915 
00916     return true;
00917 }
00918 
00919 bool CGUIClassifier::set_max_train_time(float64_t max)
00920 {
00921     if (max>0)
00922     {
00923         max_train_time=max;
00924         SG_INFO("Set to max_train_time=%f.\n", max_train_time);
00925     }
00926     else
00927         SG_INFO("Disabling max_train_time.\n");
00928 
00929     return true;
00930 }
00931 
00932 bool CGUIClassifier::set_svr_tube_epsilon(float64_t tube_epsilon)
00933 {
00934     if (!classifier)
00935         SG_ERROR("No regression method allocated\n");
00936 
00937     if (classifier->get_classifier_type() != CT_LIBSVR &&
00938             classifier->get_classifier_type() != CT_SVRLIGHT &&
00939             classifier->get_classifier_type() != CT_MKLREGRESSION )
00940     {
00941         SG_ERROR("Underlying method not capable of SV-regression\n");
00942     }
00943 
00944     if (tube_epsilon<0)
00945         svm_tube_epsilon=1e-2;
00946     svm_tube_epsilon=tube_epsilon;
00947 
00948     ((CSVM*) classifier)->set_tube_epsilon(svm_tube_epsilon);
00949     SG_INFO("Set to svr_tube_epsilon=%f.\n", svm_tube_epsilon);
00950 
00951     return true;
00952 }
00953 
00954 bool CGUIClassifier::set_svm_nu(float64_t nu)
00955 {
00956     if (nu<0 || nu>1)
00957         nu=0.5;
00958 
00959     svm_nu=nu;
00960     SG_INFO("Set to nu=%f.\n", svm_nu);
00961 
00962     return true;
00963 }
00964 
00965 bool CGUIClassifier::set_svm_mkl_parameters(
00966     float64_t weight_epsilon, float64_t C, float64_t norm)
00967 {
00968     if (weight_epsilon<0)
00969         weight_epsilon=1e-4;
00970     if (C<0)
00971         C=0;
00972     if (norm<0)
00973         SG_ERROR("MKL norm >= 0\n");
00974 
00975     svm_weight_epsilon=weight_epsilon;
00976     C_mkl=C;
00977     mkl_norm=norm;
00978 
00979     SG_INFO("Set to weight_epsilon=%f.\n", svm_weight_epsilon);
00980     SG_INFO("Set to C_mkl=%f.\n", C_mkl);
00981     SG_INFO("Set to mkl_norm=%f.\n", mkl_norm);
00982 
00983     return true;
00984 }
00985 
00986 bool CGUIClassifier::set_elasticnet_lambda(float64_t lambda)
00987 {
00988   if (lambda<0 || lambda>1)
00989     SG_ERROR("0 <= ent_lambda <= 1\n");
00990 
00991   ent_lambda = lambda;
00992   return true;
00993 }
00994 
00995 bool CGUIClassifier::set_mkl_block_norm(float64_t mkl_bnorm)
00996 {
00997   if (mkl_bnorm<1)
00998     SG_ERROR("1 <= mkl_block_norm <= inf\n");
00999 
01000   mkl_block_norm=mkl_bnorm;
01001   return true;
01002 }
01003 
01004 
01005 bool CGUIClassifier::set_svm_C(float64_t C1, float64_t C2)
01006 {
01007     if (C1<0)
01008         svm_C1=1.0;
01009     else
01010         svm_C1=C1;
01011 
01012     if (C2<0)
01013         svm_C2=svm_C1;
01014     else
01015         svm_C2=C2;
01016 
01017     SG_INFO("Set to C1=%f C2=%f.\n", svm_C1, svm_C2);
01018 
01019     return true;
01020 }
01021 
01022 bool CGUIClassifier::set_svm_qpsize(int32_t qpsize)
01023 {
01024     if (qpsize<2)
01025         svm_qpsize=41;
01026     else
01027         svm_qpsize=qpsize;
01028     SG_INFO("Set qpsize to svm_qpsize=%d.\n", svm_qpsize);
01029 
01030     return true;
01031 }
01032 
01033 bool CGUIClassifier::set_svm_max_qpsize(int32_t max_qpsize)
01034 {
01035     if (max_qpsize<50)
01036         svm_max_qpsize=50;
01037     else
01038         svm_max_qpsize=max_qpsize;
01039     SG_INFO("Set max qpsize to svm_max_qpsize=%d.\n", svm_max_qpsize);
01040 
01041     return true;
01042 }
01043 
01044 bool CGUIClassifier::set_svm_bufsize(int32_t bufsize)
01045 {
01046     if (svm_bufsize<0)
01047         svm_bufsize=3000;
01048     else
01049         svm_bufsize=bufsize;
01050     SG_INFO("Set bufsize to svm_bufsize=%d.\n", svm_bufsize);
01051 
01052     return true ;
01053 }
01054 
01055 bool CGUIClassifier::set_svm_shrinking_enabled(bool enabled)
01056 {
01057     svm_use_shrinking=enabled;
01058     if (svm_use_shrinking)
01059         SG_INFO("Enabling shrinking optimization.\n");
01060     else
01061         SG_INFO("Disabling shrinking optimization.\n");
01062 
01063     return true;
01064 }
01065 
01066 bool CGUIClassifier::set_svm_batch_computation_enabled(bool enabled)
01067 {
01068     svm_use_batch_computation=enabled;
01069     if (svm_use_batch_computation)
01070         SG_INFO("Enabling batch computation.\n");
01071     else
01072         SG_INFO("Disabling batch computation.\n");
01073 
01074     return true;
01075 }
01076 
01077 bool CGUIClassifier::set_svm_linadd_enabled(bool enabled)
01078 {
01079     svm_use_linadd=enabled;
01080     if (svm_use_linadd)
01081         SG_INFO("Enabling LINADD optimization.\n");
01082     else
01083         SG_INFO("Disabling LINADD optimization.\n");
01084 
01085     return true;
01086 }
01087 
01088 bool CGUIClassifier::set_svm_bias_enabled(bool enabled)
01089 {
01090     svm_use_bias=enabled;
01091     if (svm_use_bias)
01092         SG_INFO("Enabling svm bias.\n");
01093     else
01094         SG_INFO("Disabling svm bias.\n");
01095 
01096     return true;
01097 }
01098 
01099 bool CGUIClassifier::set_mkl_interleaved_enabled(bool enabled)
01100 {
01101     mkl_use_interleaved=enabled;
01102     if (mkl_use_interleaved)
01103         SG_INFO("Enabling mkl interleaved optimization.\n");
01104     else
01105         SG_INFO("Disabling mkl interleaved optimization.\n");
01106 
01107     return true;
01108 }
01109 
01110 bool CGUIClassifier::set_do_auc_maximization(bool do_auc)
01111 {
01112     svm_do_auc_maximization=do_auc;
01113 
01114     if (svm_do_auc_maximization)
01115         SG_INFO("Enabling AUC maximization.\n");
01116     else
01117         SG_INFO("Disabling AUC maximization.\n");
01118 
01119     return true;
01120 }
01121 
01122 
01123 CLabels* CGUIClassifier::classify()
01124 {
01125     ASSERT(classifier);
01126 
01127     switch (classifier->get_classifier_type())
01128     {
01129         case CT_LIGHT:
01130         case CT_LIGHTONECLASS:
01131         case CT_LIBSVM:
01132         case CT_SCATTERSVM:
01133         case CT_MPD:
01134         case CT_GPBT:
01135         case CT_CPLEXSVM:
01136         case CT_GMNPSVM:
01137         case CT_GNPPSVM:
01138         case CT_LIBSVR:
01139         case CT_LIBSVMMULTICLASS:
01140         case CT_LIBSVMONECLASS:
01141         case CT_SVRLIGHT:
01142         case CT_MKLCLASSIFICATION:
01143         case CT_MKLMULTICLASS:
01144         case CT_MKLREGRESSION:
01145         case CT_MKLONECLASS:
01146         case CT_KERNELRIDGEREGRESSION:
01147             return classify_kernelmachine();
01148         case CT_KNN:
01149             return classify_distancemachine();
01150         case CT_PERCEPTRON:
01151         case CT_LDA:
01152             return classify_linear();
01153         case CT_SVMLIN:
01154         case CT_SVMPERF:
01155         case CT_SUBGRADIENTSVM:
01156         case CT_SVMOCAS:
01157         case CT_SVMSGD:
01158         case CT_LPM:
01159         case CT_LPBOOST:
01160         case CT_SUBGRADIENTLPM:
01161         case CT_LIBLINEAR:
01162             return classify_linear();
01163         case CT_WDSVMOCAS:
01164             return classify_byte_linear();
01165         default:
01166             SG_ERROR("unknown classifier type\n");
01167             break;
01168     };
01169 
01170     return NULL;
01171 }
01172 
01173 CLabels* CGUIClassifier::classify_kernelmachine()
01174 {
01175     CFeatures* trainfeatures=ui->ui_features->get_train_features();
01176     CFeatures* testfeatures=ui->ui_features->get_test_features();
01177 
01178     if (!classifier)
01179         SG_ERROR("No kernelmachine available.\n");
01180 
01181     bool success=true;
01182 
01183     if (ui->ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM)
01184     {
01185         if (ui->ui_kernel->get_kernel()->get_kernel_type()==K_COMBINED
01186                 && ( !trainfeatures || !testfeatures ))
01187         {
01188             SG_DEBUG("skipping initialisation of combined kernel "
01189                     "as train/test features are unavailable\n");
01190         }
01191         else
01192         {
01193             if (!trainfeatures)
01194                 SG_ERROR("No training features available.\n");
01195             if (!testfeatures)
01196                 SG_ERROR("No test features available.\n");
01197 
01198             success=ui->ui_kernel->init_kernel("TEST");
01199         }
01200     }
01201 
01202     if (!success || !ui->ui_kernel->is_initialized())
01203         SG_ERROR("Kernel not initialized.\n");
01204 
01205     EMachineType type = classifier->get_classifier_type();
01206     if (type==CT_LARANK || type==CT_GMNPSVM || type==CT_LIBSVMMULTICLASS ||
01207         type==CT_MKLMULTICLASS)
01208     {
01209         CKernelMulticlassMachine* kmcm = (CKernelMulticlassMachine*) classifier;
01210         kmcm->set_kernel(ui->ui_kernel->get_kernel());
01211     }
01212     else 
01213     {
01214         CKernelMachine* km=(CKernelMachine*) classifier;
01215         km->set_kernel(ui->ui_kernel->get_kernel());
01216         km->set_batch_computation_enabled(svm_use_batch_computation);
01217     }
01218 
01219     SG_INFO("Starting kernel machine testing.\n");
01220     return classifier->apply();
01221 }
01222 
01223 bool CGUIClassifier::get_trained_classifier(
01224     float64_t* &weights, int32_t &rows, int32_t &cols, float64_t*& bias,
01225     int32_t& brows, int32_t& bcols,
01226     int32_t idx) // which SVM for Multiclass
01227 {
01228     ASSERT(classifier);
01229 
01230     switch (classifier->get_classifier_type())
01231     {
01232         case CT_SCATTERSVM:
01233         case CT_GNPPSVM:
01234         case CT_LIBSVMMULTICLASS:
01235         case CT_LIGHT:
01236         case CT_LIGHTONECLASS:
01237         case CT_LIBSVM:
01238         case CT_MPD:
01239         case CT_GPBT:
01240         case CT_CPLEXSVM:
01241         case CT_GMNPSVM:
01242         case CT_LIBSVR:
01243         case CT_LIBSVMONECLASS:
01244         case CT_SVRLIGHT:
01245         case CT_MKLCLASSIFICATION:
01246         case CT_MKLREGRESSION:
01247         case CT_MKLONECLASS:
01248         case CT_MKLMULTICLASS:
01249         case CT_KERNELRIDGEREGRESSION:
01250             return get_svm(weights, rows, cols, bias, brows, bcols, idx);
01251             break;
01252         case CT_PERCEPTRON:
01253         case CT_LDA:
01254         case CT_LPM:
01255         case CT_LPBOOST:
01256         case CT_SUBGRADIENTLPM:
01257         case CT_SVMOCAS:
01258         case CT_SVMSGD:
01259         case CT_SVMLIN:
01260         case CT_SVMPERF:
01261         case CT_SUBGRADIENTSVM:
01262         case CT_LIBLINEAR:
01263             return get_linear(weights, rows, cols, bias, brows, bcols);
01264             break;
01265         case CT_KMEANS:
01266         case CT_HIERARCHICAL:
01267             return get_clustering(weights, rows, cols, bias, brows, bcols);
01268             break;
01269         case CT_KNN:
01270             SG_ERROR("not implemented");
01271             break;
01272         default:
01273             SG_ERROR("unknown classifier type\n");
01274             break;
01275     };
01276     return false;
01277 }
01278 
01279 
01280 int32_t CGUIClassifier::get_num_svms()
01281 {
01282     ASSERT(classifier);
01283     return ((CMulticlassSVM*) classifier)->get_num_machines();
01284 }
01285 
01286 bool CGUIClassifier::get_svm(
01287     float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias,
01288     int32_t& brows, int32_t& bcols, int32_t idx)
01289 {
01290     CSVM* svm=(CSVM*) classifier;
01291 
01292     if (idx>-1) // should be MulticlassSVM
01293         svm=((CMulticlassSVM*) svm)->get_svm(idx);
01294 
01295     if (svm)
01296     {
01297         brows=1;
01298         bcols=1;
01299         bias=SG_MALLOC(float64_t, 1);
01300         *bias=svm->get_bias();
01301 
01302         rows=svm->get_num_support_vectors();
01303         cols=2;
01304         weights=SG_MALLOC(float64_t, rows*cols);
01305 
01306         for (int32_t i=0; i<rows; i++)
01307         {
01308             weights[i]=svm->get_alpha(i);
01309             weights[i+rows]=svm->get_support_vector(i);
01310         }
01311 
01312         return true;
01313     }
01314 
01315     return false;
01316 }
01317 
01318 bool CGUIClassifier::get_clustering(
01319     float64_t* &centers, int32_t& rows, int32_t& cols, float64_t*& radi,
01320     int32_t& brows, int32_t& bcols)
01321 {
01322     if (!classifier)
01323         return false;
01324 
01325     switch (classifier->get_classifier_type())
01326     {
01327         case CT_KMEANS:
01328         {
01329             CKMeans* clustering=(CKMeans*) classifier;
01330 
01331             bcols=1;
01332             SGVector<float64_t> r=clustering->get_radiuses();
01333             brows=r.vlen;
01334             radi=SG_MALLOC(float64_t, brows);
01335             memcpy(radi, r.vector, sizeof(float64_t)*brows);
01336 
01337             cols=1;
01338             SGMatrix<float64_t> c=clustering->get_cluster_centers();
01339             rows=c.num_rows;
01340             cols=c.num_cols;
01341             centers=SG_MALLOC(float64_t, rows*cols);
01342             memcpy(centers, c.matrix, sizeof(float64_t)*rows*cols);
01343             break;
01344         }
01345 
01346         case CT_HIERARCHICAL:
01347         {
01348             CHierarchical* clustering=(CHierarchical*) classifier;
01349 
01350             // radi == merge_distances, centers == pairs
01351             bcols=1;
01352             SGVector<float64_t> r=clustering->get_merge_distances();
01353             brows=r.vlen;
01354             radi=SG_MALLOC(float64_t, brows);
01355             memcpy(radi, r.vector, sizeof(float64_t)*brows);
01356 
01357             SGMatrix<int32_t> p=clustering->get_cluster_pairs();
01358             rows=p.num_rows;
01359             cols=p.num_cols;
01360             centers=SG_MALLOC(float64_t, rows*cols);
01361             for (int32_t i=0; i<rows*cols; i++)
01362                 centers[i]=(float64_t) p.matrix[i];
01363 
01364             break;
01365         }
01366 
01367         default:
01368             SG_ERROR("internal error - unknown clustering type\n");
01369     }
01370 
01371     return true;
01372 }
01373 
01374 bool CGUIClassifier::get_linear(
01375     float64_t* &weights, int32_t& rows, int32_t& cols, float64_t*& bias,
01376     int32_t& brows, int32_t& bcols)
01377 {
01378     CLinearMachine* linear=(CLinearMachine*) classifier;
01379 
01380     if (!linear)
01381         return false;
01382 
01383     bias=SG_MALLOC(float64_t, 1);
01384     *bias=linear->get_bias();
01385     brows=1;
01386     bcols=1;
01387 
01388     SGVector<float64_t> w=linear->get_w();
01389     cols=1;
01390     rows=w.vlen;
01391 
01392     weights= SG_MALLOC(float64_t, w.vlen);
01393     memcpy(weights, w.vector, sizeof(float64_t)*w.vlen);
01394 
01395     return true;
01396 }
01397 
01398 CLabels* CGUIClassifier::classify_distancemachine()
01399 {
01400     CFeatures* trainfeatures=ui->ui_features->get_train_features();
01401     CFeatures* testfeatures=ui->ui_features->get_test_features();
01402 
01403     if (!classifier)
01404     {
01405         SG_ERROR("no kernelmachine available\n") ;
01406         return NULL;
01407     }
01408     if (!trainfeatures)
01409     {
01410         SG_ERROR("no training features available\n") ;
01411         return NULL;
01412     }
01413 
01414     if (!testfeatures)
01415     {
01416         SG_ERROR("no test features available\n") ;
01417         return NULL;
01418     }
01419 
01420     bool success=ui->ui_distance->init_distance("TEST");
01421 
01422     if (!success || !ui->ui_distance->is_initialized())
01423     {
01424         SG_ERROR("distance not initialized\n") ;
01425         return NULL;
01426     }
01427 
01428     ((CDistanceMachine*) classifier)->set_distance(
01429         ui->ui_distance->get_distance());
01430     SG_INFO("starting distance machine testing\n") ;
01431     return classifier->apply();
01432 }
01433 
01434 
01435 CLabels* CGUIClassifier::classify_linear()
01436 {
01437     CFeatures* testfeatures=ui->ui_features->get_test_features();
01438 
01439     if (!classifier)
01440     {
01441         SG_ERROR("no classifier available\n") ;
01442         return NULL;
01443     }
01444     if (!testfeatures)
01445     {
01446         SG_ERROR("no test features available\n") ;
01447         return NULL;
01448     }
01449     if (!(testfeatures->has_property(FP_DOT)))
01450     {
01451         SG_ERROR("testfeatures not based on DotFeatures\n") ;
01452         return NULL;
01453     }
01454 
01455     ((CLinearMachine*) classifier)->set_features((CDotFeatures*) testfeatures);
01456     SG_INFO("starting linear classifier testing\n") ;
01457     return classifier->apply();
01458 }
01459 
01460 CLabels* CGUIClassifier::classify_byte_linear()
01461 {
01462     CFeatures* testfeatures=ui->ui_features->get_test_features();
01463 
01464     if (!classifier)
01465     {
01466         SG_ERROR("no svm available\n") ;
01467         return NULL;
01468     }
01469     if (!testfeatures)
01470     {
01471         SG_ERROR("no test features available\n") ;
01472         return NULL;
01473     }
01474     if (testfeatures->get_feature_class() != C_STRING ||
01475             testfeatures->get_feature_type() != F_BYTE )
01476     {
01477         SG_ERROR("testfeatures not of class STRING type BYTE\n") ;
01478         return NULL;
01479     }
01480 
01481     ((CWDSVMOcas*) classifier)->set_features((CStringFeatures<uint8_t>*) testfeatures);
01482     SG_INFO("starting linear classifier testing\n") ;
01483     return classifier->apply();
01484 }
01485 
01486 bool CGUIClassifier::classify_example(int32_t idx, float64_t &result)
01487 {
01488     CFeatures* trainfeatures=ui->ui_features->get_train_features();
01489     CFeatures* testfeatures=ui->ui_features->get_test_features();
01490 
01491     if (!classifier)
01492     {
01493         SG_ERROR("no svm available\n") ;
01494         return false;
01495     }
01496 
01497     if (!ui->ui_kernel->is_initialized())
01498     {
01499         SG_ERROR("kernel not initialized\n") ;
01500         return false;
01501     }
01502 
01503     if (!ui->ui_kernel->get_kernel() ||
01504             !ui->ui_kernel->get_kernel()->get_kernel_type()==K_CUSTOM)
01505     {
01506         if (!trainfeatures)
01507         {
01508             SG_ERROR("no training features available\n") ;
01509             return false;
01510         }
01511 
01512         if (!testfeatures)
01513         {
01514             SG_ERROR("no test features available\n") ;
01515             return false;
01516         }
01517     }
01518 
01519     ((CKernelMachine*) classifier)->set_kernel(
01520         ui->ui_kernel->get_kernel());
01521 
01522     result=((CKernelMachine*)classifier)->apply_one(idx);
01523     return true ;
01524 }
01525 
01526 
01527 bool CGUIClassifier::set_krr_tau(float64_t tau)
01528 {
01529 #ifdef HAVE_LAPACK
01530     krr_tau=tau;
01531     ((CKernelRidgeRegression*) classifier)->set_tau(krr_tau);
01532     SG_INFO("Set to krr_tau=%f.\n", krr_tau);
01533 
01534     return true;
01535 #else
01536     return false;
01537 #endif
01538 }
01539 
01540 bool CGUIClassifier::set_solver(char* solver)
01541 {
01542     ESolverType s=ST_AUTO;
01543 
01544     if (strncmp(solver,"NEWTON", 6)==0)
01545     {
01546         SG_INFO("Using NEWTON solver.\n");
01547         s=ST_NEWTON;
01548     }
01549     else if (strncmp(solver,"DIRECT", 6)==0)
01550     {
01551         SG_INFO("Using DIRECT solver\n");
01552         s=ST_DIRECT;
01553     }
01554     else if (strncmp(solver,"BLOCK_NORM", 9)==0)
01555     {
01556         SG_INFO("Using BLOCK_NORM solver\n");
01557         s=ST_BLOCK_NORM;
01558     }
01559     else if (strncmp(solver,"ELASTICNET", 10)==0)
01560     {
01561         SG_INFO("Using ELASTICNET solver\n");
01562         s=ST_ELASTICNET;
01563     }
01564     else if (strncmp(solver,"AUTO", 4)==0)
01565     {
01566         SG_INFO("Automagically determining solver.\n");
01567         s=ST_AUTO;
01568     }
01569 #ifdef USE_CPLEX
01570     else if (strncmp(solver, "CPLEX", 5)==0)
01571     {
01572         SG_INFO("USING CPLEX METHOD selected\n");
01573         s=ST_CPLEX;
01574     }
01575 #endif
01576 #ifdef USE_GLPK
01577     else if (strncmp(solver,"GLPK", 4)==0)
01578     {
01579         SG_INFO("Using GLPK solver\n");
01580         s=ST_GLPK;
01581     }
01582 #endif
01583     else
01584         SG_ERROR("Unknown solver type, %s (not compiled in?)\n", solver);
01585 
01586 
01587     solver_type=s;
01588     return true;
01589 }
01590 
01591 bool CGUIClassifier::set_constraint_generator(char* name)
01592 {
01593     if (strcmp(name,"LIBSVM_ONECLASS")==0)
01594     {
01595         SG_UNREF(constraint_generator);
01596         constraint_generator = new CLibSVMOneClass();
01597         SG_INFO("created SVMlibsvm object for oneclass\n");
01598     }
01599     else if (strcmp(name,"LIBSVM_NU")==0)
01600     {
01601         SG_UNREF(constraint_generator);
01602         constraint_generator= new CLibSVM(LIBSVM_NU_SVC);
01603         SG_INFO("created SVMlibsvm object\n") ;
01604     }
01605     else if (strcmp(name,"LIBSVM")==0)
01606     {
01607         SG_UNREF(constraint_generator);
01608         constraint_generator= new CLibSVM();
01609         SG_INFO("created SVMlibsvm object\n") ;
01610     }
01611 #ifdef USE_SVMLIGHT
01612     else if ((strcmp(name,"LIGHT")==0) || (strcmp(name,"SVMLIGHT")==0))
01613     {
01614         SG_UNREF(constraint_generator);
01615         constraint_generator= new CSVMLight();
01616         SG_INFO("created SVMLight object\n") ;
01617     }
01618     else if (strcmp(name,"SVMLIGHT_ONECLASS")==0)
01619     {
01620         SG_UNREF(constraint_generator);
01621         constraint_generator= new CSVMLightOneClass();
01622         SG_INFO("created SVMLightOneClass object\n") ;
01623     }
01624     else if (strcmp(name,"SVRLIGHT")==0)
01625     {
01626         SG_UNREF(constraint_generator);
01627         constraint_generator= new CSVRLight();
01628         SG_INFO("created SVRLight object\n") ;
01629     }
01630 #endif //USE_SVMLIGHT
01631     else if (strcmp(name,"GPBTSVM")==0)
01632     {
01633         SG_UNREF(constraint_generator);
01634         constraint_generator= new CGPBTSVM();
01635         SG_INFO("created GPBT-SVM object\n") ;
01636     }
01637     else if (strcmp(name,"MPDSVM")==0)
01638     {
01639         SG_UNREF(constraint_generator);
01640         constraint_generator= new CMPDSVM();
01641         SG_INFO("created MPD-SVM object\n") ;
01642     }
01643     else if (strcmp(name,"GNPPSVM")==0)
01644     {
01645         SG_UNREF(constraint_generator);
01646         constraint_generator= new CGNPPSVM();
01647         SG_INFO("created GNPP-SVM object\n") ;
01648     }
01649     else if (strcmp(name,"LIBSVR")==0)
01650     {
01651         SG_UNREF(constraint_generator);
01652         constraint_generator= new CLibSVR();
01653         SG_INFO("created SVRlibsvm object\n") ;
01654     }
01655     else
01656     {
01657         SG_ERROR("Unknown SV-classifier %s.\n", name);
01658         return false;
01659     }
01660     SG_REF(constraint_generator);
01661 
01662     return (constraint_generator!=NULL);
01663 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation