SGInterface.h

Go to the documentation of this file.
00001 #ifndef __SGINTERFACE__H_
00002 #define __SGINTERFACE__H_
00003 
00004 #include <shogun/lib/memory.h>
00005 #include <shogun/lib/config.h>
00006 #include <shogun/lib/common.h>
00007 #include <shogun/base/SGObject.h>
00008 #include <shogun/features/Features.h>
00009 #include <shogun/features/StringFeatures.h>
00010 #include <shogun/features/SparseFeatures.h>
00011 #include <shogun/features/AttributeFeatures.h>
00012 #include <shogun/kernel/Kernel.h>
00013 
00014 #include <shogun/ui/GUIClassifier.h>
00015 #include <shogun/ui/GUIDistance.h>
00016 #include <shogun/ui/GUIFeatures.h>
00017 #include <shogun/ui/GUIHMM.h>
00018 #include <shogun/ui/GUIKernel.h>
00019 #include <shogun/ui/GUILabels.h>
00020 #include <shogun/ui/GUIMath.h>
00021 #include <shogun/ui/GUIPluginEstimate.h>
00022 #include <shogun/ui/GUIPreprocessor.h>
00023 #include <shogun/ui/GUITime.h>
00024 #include <shogun/ui/GUIStructure.h>
00025 #include <shogun/ui/GUISignals.h>
00026 #include <shogun/ui/GUIConverter.h>
00027 
00028 namespace shogun
00029 {
00034 enum IFType
00035 {
00037     UNDEFINED,
00038 
00040     SCALAR_INT,
00041     SCALAR_REAL,
00042     SCALAR_BOOL,
00043     STANDARD_STRING,
00044 
00046     VECTOR_BOOL,
00047     VECTOR_BYTE,
00048     VECTOR_CHAR,
00049     VECTOR_INT,
00050     VECTOR_REAL,
00051     VECTOR_SHORTREAL,
00052     VECTOR_SHORT,
00053     VECTOR_WORD,
00054 
00056     DENSE_INT,
00057     DENSE_REAL,
00058     DENSE_SHORTREAL,
00059     DENSE_SHORT,
00060     DENSE_WORD,
00061 
00063     NDARRAY_BYTE,
00064     NDARRAY_CHAR,
00065     NDARRAY_INT,
00066     NDARRAY_REAL,
00067     NDARRAY_SHORTREAL,
00068     NDARRAY_SHORT,
00069     NDARRAY_WORD,
00070 
00072     SPARSE_BYTE,
00073     SPARSE_CHAR,
00074     SPARSE_INT,
00075     SPARSE_REAL,
00076     SPARSE_SHORT,
00077     SPARSE_SHORTREAL,
00078     SPARSE_WORD,
00079 
00081     STRING_BYTE,
00082     STRING_CHAR,
00083     STRING_INT,
00084     STRING_SHORT,
00085     STRING_WORD,
00086 
00088     ATTR_STRUCT
00089 };
00090 
00092 enum E_WHICH_OBJ
00093 {
00095     SVM_PRIMAL,
00097     SVM_DUAL,
00099     MKL_PRIMAL,
00101     MKL_DUAL,
00103     MKL_RELATIVE_DUALITY_GAP,
00105     MKL_ABSOLUTE_DUALITY_GAP
00106 };
00107 
00109 class CSGInterface : public CSGObject
00110 {
00111     public:
00115         CSGInterface(bool print_copyrights=true);
00116 
00118         ~CSGInterface();
00119 
00121         virtual void reset();
00122 
00124         void translate_arg(CSGInterface* source, CSGInterface* target);
00125 
00126         /* commands */
00128         bool cmd_load_features();
00130         bool cmd_save_features();
00132         bool cmd_clean_features();
00134         bool cmd_get_features();
00136         bool cmd_add_features();
00138         bool cmd_add_multiple_features();
00140         bool cmd_add_dotfeatures();
00142         bool cmd_set_features();
00144         bool cmd_set_reference_features();
00146         bool cmd_del_last_features();
00148         bool cmd_convert();
00150         bool cmd_reshape();
00152         bool cmd_load_labels();
00154         bool cmd_set_labels();
00156         bool cmd_get_labels();
00157 
00159         bool cmd_set_kernel_normalization();
00161         bool cmd_set_kernel();
00163         bool cmd_add_kernel();
00165         bool cmd_del_last_kernel();
00167         bool cmd_init_kernel();
00169         bool cmd_clean_kernel();
00171         bool cmd_save_kernel();
00173         bool cmd_load_kernel_init();
00175         bool cmd_save_kernel_init();
00177         bool cmd_get_kernel_matrix();
00179         bool cmd_set_WD_position_weights();
00181         bool cmd_get_subkernel_weights();
00183         bool cmd_set_subkernel_weights();
00185         bool cmd_set_subkernel_weights_combined();
00187         bool cmd_get_dotfeature_weights_combined();
00189         bool cmd_set_dotfeature_weights_combined();
00191         bool cmd_set_last_subkernel_weights();
00193         bool cmd_get_WD_position_weights();
00195         bool cmd_get_last_subkernel_weights();
00197         bool cmd_compute_by_subkernels();
00199         bool cmd_init_kernel_optimization();
00201         bool cmd_get_kernel_optimization();
00203         bool cmd_delete_kernel_optimization();
00205         bool cmd_use_diagonal_speedup();
00207         bool cmd_set_kernel_optimization_type();
00209         bool cmd_set_solver();
00211         bool cmd_set_constraint_generator();
00213         bool cmd_set_prior_probs();
00215         bool cmd_set_prior_probs_from_labels();
00216 #ifdef USE_SVMLIGHT
00217 
00218         bool cmd_resize_kernel_cache();
00219 #endif //USE_SVMLIGHT
00220 
00221 
00223         bool cmd_set_distance();
00225         bool cmd_init_distance();
00227         bool cmd_get_distance_matrix();
00228 
00230         bool cmd_get_SPEC_consensus();
00232         bool cmd_get_SPEC_scoring();
00234         bool cmd_get_WD_consensus();
00236         bool cmd_compute_POIM_WD();
00238         bool cmd_get_WD_scoring();
00239 
00241         bool cmd_new_classifier();
00243         bool cmd_load_classifier();
00245         bool cmd_save_classifier();
00247         bool cmd_get_svm();
00249         bool cmd_get_num_svms();
00251         bool cmd_set_svm();
00253         bool cmd_set_linear_classifier();
00255         bool cmd_classify();
00257         bool cmd_classify_example();
00259         bool cmd_get_classifier();
00261         bool cmd_get_svm_objective();
00263         bool cmd_compute_svm_primal_objective();
00265         bool cmd_compute_svm_dual_objective();
00267         bool cmd_compute_mkl_dual_objective();
00269         bool cmd_compute_relative_mkl_duality_gap();
00271         bool cmd_compute_absolute_mkl_duality_gap();
00273         bool cmd_train_classifier();
00275         bool cmd_do_auc_maximization();
00277         bool cmd_set_perceptron_parameters();
00279         bool cmd_set_svm_qpsize();
00281         bool cmd_set_svm_max_qpsize();
00283         bool cmd_set_svm_bufsize();
00285         bool cmd_set_svm_C();
00287         bool cmd_set_svm_epsilon();
00289         bool cmd_set_svr_tube_epsilon();
00291         bool cmd_set_svm_nu();
00293         bool cmd_set_svm_mkl_parameters();
00295         bool cmd_set_elasticnet_lambda();
00297         bool cmd_set_mkl_block_norm();
00299         bool cmd_set_max_train_time();
00301         bool cmd_set_svm_mkl_enabled();
00303         bool cmd_set_svm_shrinking_enabled();
00305         bool cmd_set_svm_batch_computation_enabled();
00307         bool cmd_set_svm_linadd_enabled();
00309         bool cmd_set_svm_bias_enabled();
00311         bool cmd_set_mkl_interleaved_enabled();
00313         bool cmd_set_krr_tau();
00314 
00316         bool cmd_add_preproc();
00318         bool cmd_del_preproc();
00320         bool cmd_attach_preproc();
00322         bool cmd_clean_preproc();
00323 
00325         bool cmd_set_converter();
00327         bool cmd_embed();
00328 
00330         bool cmd_new_hmm();
00332         bool cmd_load_hmm();
00334         bool cmd_save_hmm();
00336         bool cmd_hmm_classify();
00338         bool cmd_hmm_classify_example();
00340         bool cmd_one_class_linear_hmm_classify();
00342         bool cmd_one_class_hmm_classify();
00344         bool cmd_one_class_hmm_classify_example();
00346         bool cmd_output_hmm();
00348         bool cmd_output_hmm_defined();
00350         bool cmd_hmm_likelihood();
00352         bool cmd_likelihood();
00354         bool cmd_save_likelihood();
00356         bool cmd_get_viterbi_path();
00358         bool cmd_viterbi_train_defined();
00360         bool cmd_viterbi_train();
00362         bool cmd_baum_welch_train();
00364         bool cmd_baum_welch_train_defined();
00366         bool cmd_baum_welch_trans_train();
00368         bool cmd_linear_train();
00370         bool cmd_save_path();
00372         bool cmd_append_hmm();
00374         bool cmd_append_model();
00376         bool cmd_set_hmm();
00378         bool cmd_set_hmm_as();
00380         bool cmd_get_hmm();
00382         bool cmd_set_chop();
00384         bool cmd_set_pseudo();
00386         bool cmd_load_definitions();
00388         bool cmd_convergence_criteria();
00390         bool cmd_normalize();
00392         bool cmd_add_states();
00394         bool cmd_permutation_entropy();
00396         bool cmd_relative_entropy();
00398         bool cmd_entropy();
00400         bool cmd_new_plugin_estimator();
00402         bool cmd_train_estimator();
00404         bool cmd_plugin_estimate_classify_example();
00406         bool cmd_plugin_estimate_classify();
00408         bool cmd_set_plugin_estimate();
00410         bool cmd_get_plugin_estimate();
00412         bool cmd_best_path();
00414         bool cmd_best_path_2struct();
00420         bool cmd_set_plif_struct();
00427         bool cmd_get_plif_struct();
00431         bool cmd_precompute_subkernels();
00435         bool cmd_signals_set_model() { return false; };
00439         bool cmd_signals_set_positions();
00443         bool cmd_signals_set_labels();
00447         bool cmd_signals_set_split();
00451         bool cmd_signals_set_train_mask();
00455         bool cmd_signals_add_feature();
00459         bool cmd_signals_add_kernel();
00463         bool cmd_signals_run();
00476         bool cmd_precompute_content_svms();
00480         bool cmd_get_lin_feat();
00484         bool cmd_set_lin_feat();
00485 
00489         bool cmd_init_dyn_prog();
00490 
00494         bool cmd_clean_up_dyn_prog();
00495 
00499         bool cmd_init_intron_list();
00500 
00502         bool cmd_long_transition_settings();
00503 
00512         bool cmd_precompute_tiling_features();
00524         bool cmd_set_model();
00529         bool cmd_set_feature_matrix_sparse();
00534         bool cmd_set_feature_matrix();
00536         bool cmd_best_path_trans();
00538         bool cmd_best_path_trans_deriv();
00540         bool cmd_best_path_no_b();
00542         bool cmd_best_path_no_b_trans();
00543 
00545         bool cmd_crc();
00547         bool cmd_system();
00549         bool cmd_exit();
00551         bool cmd_exec();
00553         bool cmd_set_output();
00555         bool cmd_set_threshold();
00557         bool cmd_init_random();
00559         bool cmd_set_num_threads();
00561         bool cmd_translate_string();
00563         bool cmd_clear();
00565         bool cmd_tic();
00567         bool cmd_toc();
00569         bool cmd_echo();
00571         bool cmd_print();
00573         bool cmd_loglevel();
00575         bool cmd_progress();
00577         bool cmd_syntax_highlight();
00579         bool cmd_get_version();
00581         bool cmd_help();
00583         bool cmd_whos();
00585         bool cmd_send_command();
00587         virtual bool cmd_run_python();
00589         virtual bool cmd_run_octave();
00591         virtual bool cmd_run_r();
00593         virtual bool cmd_pr_loqo();
00594 
00597 
00598         virtual IFType get_argument_type()=0;
00599 
00601         virtual int32_t get_int()=0;
00603         virtual float64_t get_real()=0;
00605         virtual bool get_bool()=0;
00606 
00610         virtual char* get_string(int32_t& len)=0;
00611 
00616         virtual void get_vector(bool*& vector, int32_t& len);
00621         virtual void get_vector(uint8_t*& vector, int32_t& len)=0;
00626         virtual void get_vector(char*& vector, int32_t& len)=0;
00631         virtual void get_vector(int32_t*& vector, int32_t& len)=0;
00636         virtual void get_vector(float64_t*& vector, int32_t& len)=0;
00641         virtual void get_vector(float32_t*& vector, int32_t& len)=0;
00646         virtual void get_vector(int16_t*& vector, int32_t& len)=0;
00651         virtual void get_vector(uint16_t*& vector, int32_t& len)=0;
00652 
00658         virtual void get_matrix(
00659             uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00665         virtual void get_matrix(
00666             char*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00672         virtual void get_matrix(
00673             int32_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00679         virtual void get_matrix(
00680             float32_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00686         virtual void get_matrix(
00687             float64_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00693         virtual void get_matrix(
00694             int16_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00700         virtual void get_matrix(
00701             uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00702 
00708         virtual void get_ndarray(
00709             uint8_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00715         virtual void get_ndarray(
00716             char*& array, int32_t*& dims, int32_t& num_dims)=0;
00722         virtual void get_ndarray(
00723             int32_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00729         virtual void get_ndarray(
00730             float32_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00736         virtual void get_ndarray(
00737             float64_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00743         virtual void get_ndarray(
00744             int16_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00750         virtual void get_ndarray(
00751             uint16_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00752 
00758         virtual void get_sparse_matrix(
00759             SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00760 
00761         /*  future versions might support types other than float64_t
00762 
00763         virtual void get_sparse_matrix(SGSparseVector<uint8_t>*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00764         virtual void get_sparse_matrix(SGSparseVector<char>*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00765         virtual void get_sparse_matrix(SGSparseVector<int32_t>*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00766         virtual void get_sparse_matrix(SGSparseVector<float32_t>*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00767         virtual void get_sparse_matrix(SGSparseVector<int16_t>*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00768         virtual void get_sparse_matrix(SGSparseVector<uint16_t>*& matrix, int32_t& num_feat, int32_t& num_vec)=0; */
00769 
00775         virtual void get_string_list(
00776             SGString<uint8_t>*& strings, int32_t& num_str,
00777             int32_t& max_string_len)=0;
00783         virtual void get_string_list(
00784             SGString<char>*& strings, int32_t& num_str,
00785             int32_t& max_string_len)=0;
00791         virtual void get_string_list(
00792             SGString<int32_t>*& strings, int32_t& num_str,
00793             int32_t& max_string_len)=0;
00799         virtual void get_string_list(
00800             SGString<int16_t>*& strings, int32_t& num_str,
00801             int32_t& max_string_len)=0;
00807         virtual void get_string_list(
00808             SGString<uint16_t>*& strings, int32_t& num_str,
00809             int32_t& max_string_len)=0;
00810 
00814         virtual void get_attribute_struct(
00815             const CDynamicArray<T_ATTRIBUTE>* &attrs)=0;
00816 
00817         // set functions - to pass data from shogun to the target interface
00821         virtual bool create_return_values(int32_t num_val)=0;
00822 
00826         virtual void set_int(int32_t scalar)=0;
00830         virtual void set_real(float64_t scalar)=0;
00834         virtual void set_bool(bool scalar)=0;
00835 
00840         virtual void set_vector(const bool* vector, int32_t len);
00845         virtual void set_vector(const uint8_t* vector, int32_t len)=0;
00850         virtual void set_vector(const char* vector, int32_t len)=0;
00855         virtual void set_vector(const int32_t* vector, int32_t len)=0;
00860         virtual void set_vector(const float32_t* vector, int32_t len)=0;
00865         virtual void set_vector(const float64_t* vector, int32_t len)=0;
00870         virtual void set_vector(const int16_t* vector, int32_t len)=0;
00875         virtual void set_vector(const uint16_t* vector, int32_t len)=0;
00876 
00882         virtual void set_matrix(
00883             const uint8_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00889         virtual void set_matrix(
00890             const char* matrix, int32_t num_feat, int32_t num_vec)=0;
00896         virtual void set_matrix(
00897             const int32_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00903         virtual void set_matrix(
00904             const float32_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00910         virtual void set_matrix(
00911             const float64_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00917         virtual void set_matrix(
00918             const int16_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00924         virtual void set_matrix(
00925             const uint16_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00926 
00933         virtual void set_sparse_matrix(
00934             const SGSparseVector<float64_t>* matrix, int32_t num_feat,
00935             int32_t num_vec, int64_t nnz)=0;
00936 
00937         /*  future versions might support types other than float64_t
00938 
00939         virtual void set_sparse_matrix(const SGSparseVector<uint8_t>* matrix, int32_t num_feat, int32_t num_vec)=0;
00940         virtual void set_sparse_matrix(const SGSparseVector<char>* matrix, int32_t num_feat, int32_t num_vec)=0;
00941         virtual void set_sparse_matrix(const SGSparseVector<int32_t>* matrix, int32_t num_feat, int32_t num_vec)=0;
00942         virtual void set_sparse_matrix(const SGSparseVector<float32_t>* matrix, int32_t num_feat, int32_t num_vec)=0;
00943         virtual void set_sparse_matrix(const SGSparseVector<int16_t>* matrix, int32_t num_feat, int32_t num_vec)=0;
00944         virtual void set_sparse_matrix(const SGSparseVector<uint16_t>* matrix, int32_t num_feat, int32_t num_vec)=0; */
00945 
00950         virtual void set_string_list(
00951             const SGString<uint8_t>* strings, int32_t num_str)=0;
00956         virtual void set_string_list(
00957             const SGString<char>* strings, int32_t num_str)=0;
00962         virtual void set_string_list(
00963             const SGString<int32_t>* strings, int32_t num_str)=0;
00968         virtual void set_string_list(
00969             const SGString<int16_t>* strings, int32_t num_str)=0;
00974         virtual void set_string_list(
00975             const SGString<uint16_t>* strings, int32_t num_str)=0;
00976 
00980         virtual void set_attribute_struct(
00981             const CDynamicArray<T_ATTRIBUTE>* attrs)=0;
00982 
00984         bool handle();
00985 
00987         void print_prompt();
00988 
00990         int32_t get_nlhs() { return m_nlhs; }
00991 
00993         int32_t get_nrhs() { return m_nrhs; }
00994 
00995 
00996         // ui lib
00998         CGUIClassifier* ui_classifier;
01000         CGUIDistance* ui_distance;
01002         CGUIFeatures* ui_features;
01004         CGUIHMM* ui_hmm;
01006         CGUIKernel* ui_kernel;
01008         CGUILabels* ui_labels;
01010         CGUIMath* ui_math;
01012         CGUIPluginEstimate* ui_pluginestimate;
01014         CGUIPreprocessor* ui_preproc;
01016         CGUITime* ui_time;
01018         CGUIStructure* ui_structure;
01019         //CGUISignals* ui_signals;
01021         CGUIConverter* ui_converter;
01022 
01023     protected:
01031         static bool strmatch(const char* str, const char* cmd, int32_t len=-1)
01032         {
01033             if (len==-1)
01034             {
01035                 len=strlen(cmd);
01036                 if (strlen(str)!=(size_t) len) // match exact length
01037                     return false;
01038             }
01039 
01040             return (strncmp(str, cmd, len)==0);
01041         }
01042 
01047         static bool strendswith(const char* str, const char* cmd)
01048         {
01049             size_t idx=strlen(str);
01050             size_t len=strlen(cmd);
01051 
01052             if (strlen(str) < len)
01053                 return false;
01054 
01055             str=&str[idx-len];
01056 
01057             return (strncmp(str, cmd, len)==0);
01058         }
01060         char* get_command(int32_t &len)
01061         {
01062             ASSERT(m_rhs_counter==0);
01063             if (m_nrhs<=0)
01064                 SG_SERROR("No input arguments supplied.\n");
01065 
01066             return get_string(len);
01067         }
01068     private:
01070         bool do_compute_objective(E_WHICH_OBJ obj);
01072         bool do_hmm_classify(bool linear=false, bool one_class=false);
01074         bool do_hmm_classify_example(bool one_class=false);
01076         bool do_set_features(bool add=false, bool check_dot=false, int32_t repetitions=1);
01077 
01079         void convert_to_bitembedding(CFeatures* &features, bool convert_to_word, bool convert_to_ulong);
01081         void obtain_from_single_string(CFeatures* features);
01083         bool obtain_from_position_list(CFeatures* features);
01085         bool obtain_by_sliding_window(CFeatures* features);
01087         CKernel* create_kernel();
01088 
01090         CFeatures* create_custom_string_features(CStringFeatures<uint8_t>* f);
01091 
01092         CFeatures* create_custom_real_features(CDenseFeatures<float64_t>* orig_feat);
01094         char* get_str_from_str_or_direct(int32_t& len);
01095         int32_t get_int_from_int_or_str();
01096         float64_t get_real_from_real_or_str();
01097         bool get_bool_from_bool_or_str();
01098         void get_vector_from_int_vector_or_str(
01099             int32_t*& vector, int32_t& len);
01100         void get_vector_from_real_vector_or_str(
01101             float64_t*& vector, int32_t& len);
01102         int32_t get_vector_len_from_str(int32_t expected_len=0);
01103         char* get_str_from_str(int32_t& len);
01104         int32_t get_num_args_in_str();
01105 
01108         char* get_line(FILE* infile=stdin, bool show_prompt=true);
01109 
01110     protected:
01112         int32_t m_lhs_counter;
01114         int32_t m_rhs_counter;
01116         int32_t m_nlhs;
01118         int32_t m_nrhs;
01119 
01120         // related to cmd_exec and cmd_echo
01122         FILE* file_out;
01124         char input[10000];
01126         bool echo;
01127 
01129         char* m_legacy_strptr;
01130 };
01131 
01133 typedef bool (CSGInterface::*CSGInterfacePtr)();
01134 
01135 #ifndef DOXYGEN_SHOULD_SKIP_THIS
01136 
01137 typedef struct {
01139     const char* command;
01141     CSGInterfacePtr method;
01143     const char* usage_prefix;
01145     const char* usage_suffix;
01146 } CSGInterfaceMethod;
01147 }
01148 #endif
01149 
01150 #endif // __SGINTERFACE__H_
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation