00001 #ifndef __SGINTERFACE__H_
00002 #define __SGINTERFACE__H_
00003
00004 #include <shogun/lib/memory.h>
00005 #include <shogun/lib/config.h>
00006 #include <shogun/lib/common.h>
00007 #include <shogun/base/SGObject.h>
00008 #include <shogun/features/Features.h>
00009 #include <shogun/features/StringFeatures.h>
00010 #include <shogun/features/SparseFeatures.h>
00011 #include <shogun/features/AttributeFeatures.h>
00012 #include <shogun/kernel/Kernel.h>
00013
00014 #include <shogun/ui/GUIClassifier.h>
00015 #include <shogun/ui/GUIDistance.h>
00016 #include <shogun/ui/GUIFeatures.h>
00017 #include <shogun/ui/GUIHMM.h>
00018 #include <shogun/ui/GUIKernel.h>
00019 #include <shogun/ui/GUILabels.h>
00020 #include <shogun/ui/GUIMath.h>
00021 #include <shogun/ui/GUIPluginEstimate.h>
00022 #include <shogun/ui/GUIPreprocessor.h>
00023 #include <shogun/ui/GUITime.h>
00024 #include <shogun/ui/GUIStructure.h>
00025 #include <shogun/ui/GUISignals.h>
00026 #include <shogun/ui/GUIConverter.h>
00027
00028 namespace shogun
00029 {
00034 enum IFType
00035 {
00037 UNDEFINED,
00038
00040 SCALAR_INT,
00041 SCALAR_REAL,
00042 SCALAR_BOOL,
00043 STANDARD_STRING,
00044
00046 VECTOR_BOOL,
00047 VECTOR_BYTE,
00048 VECTOR_CHAR,
00049 VECTOR_INT,
00050 VECTOR_REAL,
00051 VECTOR_SHORTREAL,
00052 VECTOR_SHORT,
00053 VECTOR_WORD,
00054
00056 DENSE_INT,
00057 DENSE_REAL,
00058 DENSE_SHORTREAL,
00059 DENSE_SHORT,
00060 DENSE_WORD,
00061
00063 NDARRAY_BYTE,
00064 NDARRAY_CHAR,
00065 NDARRAY_INT,
00066 NDARRAY_REAL,
00067 NDARRAY_SHORTREAL,
00068 NDARRAY_SHORT,
00069 NDARRAY_WORD,
00070
00072 SPARSE_BYTE,
00073 SPARSE_CHAR,
00074 SPARSE_INT,
00075 SPARSE_REAL,
00076 SPARSE_SHORT,
00077 SPARSE_SHORTREAL,
00078 SPARSE_WORD,
00079
00081 STRING_BYTE,
00082 STRING_CHAR,
00083 STRING_INT,
00084 STRING_SHORT,
00085 STRING_WORD,
00086
00088 ATTR_STRUCT
00089 };
00090
00092 enum E_WHICH_OBJ
00093 {
00095 SVM_PRIMAL,
00097 SVM_DUAL,
00099 MKL_PRIMAL,
00101 MKL_DUAL,
00103 MKL_RELATIVE_DUALITY_GAP,
00105 MKL_ABSOLUTE_DUALITY_GAP
00106 };
00107
00109 class CSGInterface : public CSGObject
00110 {
00111 public:
00115 CSGInterface(bool print_copyrights=true);
00116
00118 ~CSGInterface();
00119
00121 virtual void reset();
00122
00124 void translate_arg(CSGInterface* source, CSGInterface* target);
00125
00126
00128 bool cmd_load_features();
00130 bool cmd_save_features();
00132 bool cmd_clean_features();
00134 bool cmd_get_features();
00136 bool cmd_add_features();
00138 bool cmd_add_multiple_features();
00140 bool cmd_add_dotfeatures();
00142 bool cmd_set_features();
00144 bool cmd_set_reference_features();
00146 bool cmd_del_last_features();
00148 bool cmd_convert();
00150 bool cmd_reshape();
00152 bool cmd_load_labels();
00154 bool cmd_set_labels();
00156 bool cmd_get_labels();
00157
00159 bool cmd_set_kernel_normalization();
00161 bool cmd_set_kernel();
00163 bool cmd_add_kernel();
00165 bool cmd_del_last_kernel();
00167 bool cmd_init_kernel();
00169 bool cmd_clean_kernel();
00171 bool cmd_save_kernel();
00173 bool cmd_load_kernel_init();
00175 bool cmd_save_kernel_init();
00177 bool cmd_get_kernel_matrix();
00179 bool cmd_set_WD_position_weights();
00181 bool cmd_get_subkernel_weights();
00183 bool cmd_set_subkernel_weights();
00185 bool cmd_set_subkernel_weights_combined();
00187 bool cmd_get_dotfeature_weights_combined();
00189 bool cmd_set_dotfeature_weights_combined();
00191 bool cmd_set_last_subkernel_weights();
00193 bool cmd_get_WD_position_weights();
00195 bool cmd_get_last_subkernel_weights();
00197 bool cmd_compute_by_subkernels();
00199 bool cmd_init_kernel_optimization();
00201 bool cmd_get_kernel_optimization();
00203 bool cmd_delete_kernel_optimization();
00205 bool cmd_use_diagonal_speedup();
00207 bool cmd_set_kernel_optimization_type();
00209 bool cmd_set_solver();
00211 bool cmd_set_constraint_generator();
00213 bool cmd_set_prior_probs();
00215 bool cmd_set_prior_probs_from_labels();
00216 #ifdef USE_SVMLIGHT
00217
00218 bool cmd_resize_kernel_cache();
00219 #endif //USE_SVMLIGHT
00220
00221
00223 bool cmd_set_distance();
00225 bool cmd_init_distance();
00227 bool cmd_get_distance_matrix();
00228
00230 bool cmd_get_SPEC_consensus();
00232 bool cmd_get_SPEC_scoring();
00234 bool cmd_get_WD_consensus();
00236 bool cmd_compute_POIM_WD();
00238 bool cmd_get_WD_scoring();
00239
00241 bool cmd_new_classifier();
00243 bool cmd_load_classifier();
00245 bool cmd_save_classifier();
00247 bool cmd_get_svm();
00249 bool cmd_get_num_svms();
00251 bool cmd_set_svm();
00253 bool cmd_set_linear_classifier();
00255 bool cmd_classify();
00257 bool cmd_classify_example();
00259 bool cmd_get_classifier();
00261 bool cmd_get_svm_objective();
00263 bool cmd_compute_svm_primal_objective();
00265 bool cmd_compute_svm_dual_objective();
00267 bool cmd_compute_mkl_dual_objective();
00269 bool cmd_compute_relative_mkl_duality_gap();
00271 bool cmd_compute_absolute_mkl_duality_gap();
00273 bool cmd_train_classifier();
00275 bool cmd_do_auc_maximization();
00277 bool cmd_set_perceptron_parameters();
00279 bool cmd_set_svm_qpsize();
00281 bool cmd_set_svm_max_qpsize();
00283 bool cmd_set_svm_bufsize();
00285 bool cmd_set_svm_C();
00287 bool cmd_set_svm_epsilon();
00289 bool cmd_set_svr_tube_epsilon();
00291 bool cmd_set_svm_nu();
00293 bool cmd_set_svm_mkl_parameters();
00295 bool cmd_set_elasticnet_lambda();
00297 bool cmd_set_mkl_block_norm();
00299 bool cmd_set_max_train_time();
00301 bool cmd_set_svm_mkl_enabled();
00303 bool cmd_set_svm_shrinking_enabled();
00305 bool cmd_set_svm_batch_computation_enabled();
00307 bool cmd_set_svm_linadd_enabled();
00309 bool cmd_set_svm_bias_enabled();
00311 bool cmd_set_mkl_interleaved_enabled();
00313 bool cmd_set_krr_tau();
00314
00316 bool cmd_add_preproc();
00318 bool cmd_del_preproc();
00320 bool cmd_attach_preproc();
00322 bool cmd_clean_preproc();
00323
00325 bool cmd_set_converter();
00327 bool cmd_embed();
00328
00330 bool cmd_new_hmm();
00332 bool cmd_load_hmm();
00334 bool cmd_save_hmm();
00336 bool cmd_hmm_classify();
00338 bool cmd_hmm_classify_example();
00340 bool cmd_one_class_linear_hmm_classify();
00342 bool cmd_one_class_hmm_classify();
00344 bool cmd_one_class_hmm_classify_example();
00346 bool cmd_output_hmm();
00348 bool cmd_output_hmm_defined();
00350 bool cmd_hmm_likelihood();
00352 bool cmd_likelihood();
00354 bool cmd_save_likelihood();
00356 bool cmd_get_viterbi_path();
00358 bool cmd_viterbi_train_defined();
00360 bool cmd_viterbi_train();
00362 bool cmd_baum_welch_train();
00364 bool cmd_baum_welch_train_defined();
00366 bool cmd_baum_welch_trans_train();
00368 bool cmd_linear_train();
00370 bool cmd_save_path();
00372 bool cmd_append_hmm();
00374 bool cmd_append_model();
00376 bool cmd_set_hmm();
00378 bool cmd_set_hmm_as();
00380 bool cmd_get_hmm();
00382 bool cmd_set_chop();
00384 bool cmd_set_pseudo();
00386 bool cmd_load_definitions();
00388 bool cmd_convergence_criteria();
00390 bool cmd_normalize();
00392 bool cmd_add_states();
00394 bool cmd_permutation_entropy();
00396 bool cmd_relative_entropy();
00398 bool cmd_entropy();
00400 bool cmd_new_plugin_estimator();
00402 bool cmd_train_estimator();
00404 bool cmd_plugin_estimate_classify_example();
00406 bool cmd_plugin_estimate_classify();
00408 bool cmd_set_plugin_estimate();
00410 bool cmd_get_plugin_estimate();
00412 bool cmd_best_path();
00414 bool cmd_best_path_2struct();
00420 bool cmd_set_plif_struct();
00427 bool cmd_get_plif_struct();
00431 bool cmd_precompute_subkernels();
00435 bool cmd_signals_set_model() { return false; };
00439 bool cmd_signals_set_positions();
00443 bool cmd_signals_set_labels();
00447 bool cmd_signals_set_split();
00451 bool cmd_signals_set_train_mask();
00455 bool cmd_signals_add_feature();
00459 bool cmd_signals_add_kernel();
00463 bool cmd_signals_run();
00476 bool cmd_precompute_content_svms();
00480 bool cmd_get_lin_feat();
00484 bool cmd_set_lin_feat();
00485
00489 bool cmd_init_dyn_prog();
00490
00494 bool cmd_clean_up_dyn_prog();
00495
00499 bool cmd_init_intron_list();
00500
00502 bool cmd_long_transition_settings();
00503
00512 bool cmd_precompute_tiling_features();
00524 bool cmd_set_model();
00529 bool cmd_set_feature_matrix_sparse();
00534 bool cmd_set_feature_matrix();
00536 bool cmd_best_path_trans();
00538 bool cmd_best_path_trans_deriv();
00540 bool cmd_best_path_no_b();
00542 bool cmd_best_path_no_b_trans();
00543
00545 bool cmd_crc();
00547 bool cmd_system();
00549 bool cmd_exit();
00551 bool cmd_exec();
00553 bool cmd_set_output();
00555 bool cmd_set_threshold();
00557 bool cmd_init_random();
00559 bool cmd_set_num_threads();
00561 bool cmd_translate_string();
00563 bool cmd_clear();
00565 bool cmd_tic();
00567 bool cmd_toc();
00569 bool cmd_echo();
00571 bool cmd_print();
00573 bool cmd_loglevel();
00575 bool cmd_progress();
00577 bool cmd_syntax_highlight();
00579 bool cmd_get_version();
00581 bool cmd_help();
00583 bool cmd_whos();
00585 bool cmd_send_command();
00587 virtual bool cmd_run_python();
00589 virtual bool cmd_run_octave();
00591 virtual bool cmd_run_r();
00593 virtual bool cmd_pr_loqo();
00594
00597
00598 virtual IFType get_argument_type()=0;
00599
00601 virtual int32_t get_int()=0;
00603 virtual float64_t get_real()=0;
00605 virtual bool get_bool()=0;
00606
00610 virtual char* get_string(int32_t& len)=0;
00611
00616 virtual void get_vector(bool*& vector, int32_t& len);
00621 virtual void get_vector(uint8_t*& vector, int32_t& len)=0;
00626 virtual void get_vector(char*& vector, int32_t& len)=0;
00631 virtual void get_vector(int32_t*& vector, int32_t& len)=0;
00636 virtual void get_vector(float64_t*& vector, int32_t& len)=0;
00641 virtual void get_vector(float32_t*& vector, int32_t& len)=0;
00646 virtual void get_vector(int16_t*& vector, int32_t& len)=0;
00651 virtual void get_vector(uint16_t*& vector, int32_t& len)=0;
00652
00658 virtual void get_matrix(
00659 uint8_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00665 virtual void get_matrix(
00666 char*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00672 virtual void get_matrix(
00673 int32_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00679 virtual void get_matrix(
00680 float32_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00686 virtual void get_matrix(
00687 float64_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00693 virtual void get_matrix(
00694 int16_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00700 virtual void get_matrix(
00701 uint16_t*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00702
00708 virtual void get_ndarray(
00709 uint8_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00715 virtual void get_ndarray(
00716 char*& array, int32_t*& dims, int32_t& num_dims)=0;
00722 virtual void get_ndarray(
00723 int32_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00729 virtual void get_ndarray(
00730 float32_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00736 virtual void get_ndarray(
00737 float64_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00743 virtual void get_ndarray(
00744 int16_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00750 virtual void get_ndarray(
00751 uint16_t*& array, int32_t*& dims, int32_t& num_dims)=0;
00752
00758 virtual void get_sparse_matrix(
00759 SGSparseVector<float64_t>*& matrix, int32_t& num_feat, int32_t& num_vec)=0;
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769
00775 virtual void get_string_list(
00776 SGString<uint8_t>*& strings, int32_t& num_str,
00777 int32_t& max_string_len)=0;
00783 virtual void get_string_list(
00784 SGString<char>*& strings, int32_t& num_str,
00785 int32_t& max_string_len)=0;
00791 virtual void get_string_list(
00792 SGString<int32_t>*& strings, int32_t& num_str,
00793 int32_t& max_string_len)=0;
00799 virtual void get_string_list(
00800 SGString<int16_t>*& strings, int32_t& num_str,
00801 int32_t& max_string_len)=0;
00807 virtual void get_string_list(
00808 SGString<uint16_t>*& strings, int32_t& num_str,
00809 int32_t& max_string_len)=0;
00810
00814 virtual void get_attribute_struct(
00815 const CDynamicArray<T_ATTRIBUTE>* &attrs)=0;
00816
00817
00821 virtual bool create_return_values(int32_t num_val)=0;
00822
00826 virtual void set_int(int32_t scalar)=0;
00830 virtual void set_real(float64_t scalar)=0;
00834 virtual void set_bool(bool scalar)=0;
00835
00840 virtual void set_vector(const bool* vector, int32_t len);
00845 virtual void set_vector(const uint8_t* vector, int32_t len)=0;
00850 virtual void set_vector(const char* vector, int32_t len)=0;
00855 virtual void set_vector(const int32_t* vector, int32_t len)=0;
00860 virtual void set_vector(const float32_t* vector, int32_t len)=0;
00865 virtual void set_vector(const float64_t* vector, int32_t len)=0;
00870 virtual void set_vector(const int16_t* vector, int32_t len)=0;
00875 virtual void set_vector(const uint16_t* vector, int32_t len)=0;
00876
00882 virtual void set_matrix(
00883 const uint8_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00889 virtual void set_matrix(
00890 const char* matrix, int32_t num_feat, int32_t num_vec)=0;
00896 virtual void set_matrix(
00897 const int32_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00903 virtual void set_matrix(
00904 const float32_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00910 virtual void set_matrix(
00911 const float64_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00917 virtual void set_matrix(
00918 const int16_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00924 virtual void set_matrix(
00925 const uint16_t* matrix, int32_t num_feat, int32_t num_vec)=0;
00926
00933 virtual void set_sparse_matrix(
00934 const SGSparseVector<float64_t>* matrix, int32_t num_feat,
00935 int32_t num_vec, int64_t nnz)=0;
00936
00937
00938
00939
00940
00941
00942
00943
00944
00945
00950 virtual void set_string_list(
00951 const SGString<uint8_t>* strings, int32_t num_str)=0;
00956 virtual void set_string_list(
00957 const SGString<char>* strings, int32_t num_str)=0;
00962 virtual void set_string_list(
00963 const SGString<int32_t>* strings, int32_t num_str)=0;
00968 virtual void set_string_list(
00969 const SGString<int16_t>* strings, int32_t num_str)=0;
00974 virtual void set_string_list(
00975 const SGString<uint16_t>* strings, int32_t num_str)=0;
00976
00980 virtual void set_attribute_struct(
00981 const CDynamicArray<T_ATTRIBUTE>* attrs)=0;
00982
00984 bool handle();
00985
00987 void print_prompt();
00988
00990 int32_t get_nlhs() { return m_nlhs; }
00991
00993 int32_t get_nrhs() { return m_nrhs; }
00994
00995
00996
00998 CGUIClassifier* ui_classifier;
01000 CGUIDistance* ui_distance;
01002 CGUIFeatures* ui_features;
01004 CGUIHMM* ui_hmm;
01006 CGUIKernel* ui_kernel;
01008 CGUILabels* ui_labels;
01010 CGUIMath* ui_math;
01012 CGUIPluginEstimate* ui_pluginestimate;
01014 CGUIPreprocessor* ui_preproc;
01016 CGUITime* ui_time;
01018 CGUIStructure* ui_structure;
01019
01021 CGUIConverter* ui_converter;
01022
01023 protected:
01031 static bool strmatch(const char* str, const char* cmd, int32_t len=-1)
01032 {
01033 if (len==-1)
01034 {
01035 len=strlen(cmd);
01036 if (strlen(str)!=(size_t) len)
01037 return false;
01038 }
01039
01040 return (strncmp(str, cmd, len)==0);
01041 }
01042
01047 static bool strendswith(const char* str, const char* cmd)
01048 {
01049 size_t idx=strlen(str);
01050 size_t len=strlen(cmd);
01051
01052 if (strlen(str) < len)
01053 return false;
01054
01055 str=&str[idx-len];
01056
01057 return (strncmp(str, cmd, len)==0);
01058 }
01060 char* get_command(int32_t &len)
01061 {
01062 ASSERT(m_rhs_counter==0);
01063 if (m_nrhs<=0)
01064 SG_SERROR("No input arguments supplied.\n");
01065
01066 return get_string(len);
01067 }
01068 private:
01070 bool do_compute_objective(E_WHICH_OBJ obj);
01072 bool do_hmm_classify(bool linear=false, bool one_class=false);
01074 bool do_hmm_classify_example(bool one_class=false);
01076 bool do_set_features(bool add=false, bool check_dot=false, int32_t repetitions=1);
01077
01079 void convert_to_bitembedding(CFeatures* &features, bool convert_to_word, bool convert_to_ulong);
01081 void obtain_from_single_string(CFeatures* features);
01083 bool obtain_from_position_list(CFeatures* features);
01085 bool obtain_by_sliding_window(CFeatures* features);
01087 CKernel* create_kernel();
01088
01090 CFeatures* create_custom_string_features(CStringFeatures<uint8_t>* f);
01091
01092 CFeatures* create_custom_real_features(CDenseFeatures<float64_t>* orig_feat);
01094 char* get_str_from_str_or_direct(int32_t& len);
01095 int32_t get_int_from_int_or_str();
01096 float64_t get_real_from_real_or_str();
01097 bool get_bool_from_bool_or_str();
01098 void get_vector_from_int_vector_or_str(
01099 int32_t*& vector, int32_t& len);
01100 void get_vector_from_real_vector_or_str(
01101 float64_t*& vector, int32_t& len);
01102 int32_t get_vector_len_from_str(int32_t expected_len=0);
01103 char* get_str_from_str(int32_t& len);
01104 int32_t get_num_args_in_str();
01105
01108 char* get_line(FILE* infile=stdin, bool show_prompt=true);
01109
01110 protected:
01112 int32_t m_lhs_counter;
01114 int32_t m_rhs_counter;
01116 int32_t m_nlhs;
01118 int32_t m_nrhs;
01119
01120
01122 FILE* file_out;
01124 char input[10000];
01126 bool echo;
01127
01129 char* m_legacy_strptr;
01130 };
01131
01133 typedef bool (CSGInterface::*CSGInterfacePtr)();
01134
01135 #ifndef DOXYGEN_SHOULD_SKIP_THIS
01136
01137 typedef struct {
01139 const char* command;
01141 CSGInterfacePtr method;
01143 const char* usage_prefix;
01145 const char* usage_suffix;
01146 } CSGInterfaceMethod;
01147 }
01148 #endif
01149
01150 #endif // __SGINTERFACE__H_