SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SGInterface.cpp
Go to the documentation of this file.
4 
5 #include <shogun/lib/config.h>
6 #include <shogun/lib/DataType.h>
7 #include <shogun/lib/SGNDArray.h>
8 #include <shogun/lib/common.h>
11 #include <shogun/lib/Hash.h>
12 #include <shogun/lib/Map.h>
13 #include <shogun/lib/Signal.h>
14 
16 #include <shogun/lib/external/pr_loqo.h>
31 
33 
34 #include <shogun/structure/Plif.h>
40 
41 #include <ctype.h>
42 
43 using namespace shogun;
44 
45 CSGInterface* interface=NULL;
47 
48 #if defined(HAVE_CMDLINE)
49 #define USAGE(method) "", ""
50 #define USAGE_I(method, in) "", " " in ""
51 #define USAGE_O(method, out) "" out " = ", ""
52 #define USAGE_IO(method, in, out) "" out " = ", " " in ""
53 #define USAGE_COMMA " "
54 #define USAGE_STR ""
55 #elif defined(HAVE_R)
56 #define USAGE(method) "sg('", "')"
57 #define USAGE_I(method, in) "sg('", "', " in ")"
58 #define USAGE_O(method, out) "[" out "] <- sg('", "')"
59 #define USAGE_IO(method, in, out) "[" out "] <- sg('", "', " in ")"
60 #define USAGE_COMMA ", "
61 #define USAGE_STR "'"
62 #else
63 #define USAGE(method) "sg('", "')"
64 #define USAGE_I(method, in) "sg('", "', " in ")"
65 #define USAGE_O(method, out) "[" out "]=sg('", "')"
66 #define USAGE_IO(method, in, out) "[" out "]=sg('", "', " in ")"
67 #define USAGE_COMMA ", "
68 #define USAGE_STR "'"
69 #endif
70 
71 CSGInterfaceMethod sg_methods[]=
72 {
73  { "Features", NULL, NULL, NULL },
74  {
75  N_PR_LOQO,
76  (&CSGInterface::cmd_pr_loqo),
78  "'Var1', Var1, 'Var2', Var2", "results")
79  },
80  {
82  (&CSGInterface::cmd_load_features),
84  "filename" USAGE_COMMA "feature_class" USAGE_COMMA "type" USAGE_COMMA "target[" USAGE_COMMA "size[" USAGE_COMMA "comp_features]]")
85  },
86  {
88  (&CSGInterface::cmd_save_features),
89  USAGE_I(N_SAVE_FEATURES, "filename" USAGE_COMMA "type" USAGE_COMMA "target")
90  },
91  {
93  (&CSGInterface::cmd_clean_features),
95  },
96  {
98  (&CSGInterface::cmd_get_features),
99  USAGE_IO(N_GET_FEATURES, USAGE_STR "TRAIN|TEST" USAGE_STR, "features")
100  },
101  {
103  (&CSGInterface::cmd_add_features),
105  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
106  },
107  {
109  (&CSGInterface::cmd_add_multiple_features),
111  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "repetitions" USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
112  },
113  {
115  (&CSGInterface::cmd_add_dotfeatures),
117  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
118  },
119  {
121  (&CSGInterface::cmd_set_features),
123  USAGE_STR "TRAIN|TEST" USAGE_STR
124  USAGE_COMMA "features["
125  USAGE_COMMA "DNABINFILE|<ALPHABET>]["
126  USAGE_COMMA "[from_position_list|slide_window]"
127  USAGE_COMMA "window size"
128  USAGE_COMMA "[position_list|shift]"
129  USAGE_COMMA "skip")
130  },
131  {
133  (&CSGInterface::cmd_set_reference_features),
135  },
136  {
138  (&CSGInterface::cmd_del_last_features),
140  },
141  {
142  N_CONVERT,
143  (&CSGInterface::cmd_convert),
144  USAGE_I(N_CONVERT, USAGE_STR "TRAIN|TEST" USAGE_STR
145  USAGE_COMMA "from_class"
146  USAGE_COMMA "from_type"
147  USAGE_COMMA "to_class"
148  USAGE_COMMA "to_type["
149  USAGE_COMMA "order"
150  USAGE_COMMA "start"
151  USAGE_COMMA "gap"
152  USAGE_COMMA "reversed]")
153  },
154  {
155  N_RESHAPE,
156  (&CSGInterface::cmd_reshape),
157  USAGE_I(N_RESHAPE, USAGE_STR "TRAIN|TEST"
158  USAGE_COMMA "num_feat"
159  USAGE_COMMA "num_vec")
160  },
161  {
163  (&CSGInterface::cmd_load_labels),
164  USAGE_I(N_LOAD_LABELS, "filename"
165  USAGE_COMMA USAGE_STR "TRAIN|TARGET" USAGE_STR)
166  },
167  {
168  N_SET_LABELS,
169  (&CSGInterface::cmd_set_labels),
171  USAGE_COMMA "labels")
172  },
173  {
174  N_GET_LABELS,
175  (&CSGInterface::cmd_get_labels),
176  USAGE_IO(N_GET_LABELS, USAGE_STR "TRAIN|TEST" USAGE_STR, "labels")
177  },
178 
179 
180  { "Kernel", NULL, NULL },
181  {
183  (&CSGInterface::cmd_set_kernel_normalization),
184  USAGE_I(N_SET_KERNEL_NORMALIZATION, "IDENTITY|AVGDIAG|SQRTDIAG|FIRSTELEMENT|VARIANCE|ZEROMEANCENTER"
185  USAGE_COMMA "size[" USAGE_COMMA "kernel-specific parameters]")
186  },
187  {
188  N_SET_KERNEL,
189  (&CSGInterface::cmd_set_kernel),
190  USAGE_I(N_SET_KERNEL, "type" USAGE_COMMA "size[" USAGE_COMMA "kernel-specific parameters]")
191  },
192  {
193  N_ADD_KERNEL,
194  (&CSGInterface::cmd_add_kernel),
195  USAGE_I(N_ADD_KERNEL, "weight" USAGE_COMMA "kernel-specific parameters")
196  },
197  {
199  (&CSGInterface::cmd_del_last_kernel),
201  },
202  {
204  (&CSGInterface::cmd_init_kernel),
206  },
207  {
209  (&CSGInterface::cmd_clean_kernel),
211  },
212  {
214  (&CSGInterface::cmd_save_kernel),
215  USAGE_I(N_SAVE_KERNEL, "filename" USAGE_COMMA USAGE_STR "TRAIN|TEST" USAGE_STR)
216  },
217  {
219  (&CSGInterface::cmd_get_kernel_matrix),
220  USAGE_IO(N_GET_KERNEL_MATRIX, "[" USAGE_STR "TRAIN|TEST" USAGE_STR, "K]")
221  },
222  {
224  (&CSGInterface::cmd_set_WD_position_weights),
226  },
227  {
229  (&CSGInterface::cmd_get_subkernel_weights),
231  },
232  {
234  (&CSGInterface::cmd_set_subkernel_weights),
236  },
237  {
239  (&CSGInterface::cmd_set_subkernel_weights_combined),
241  },
242  {
244  (&CSGInterface::cmd_get_dotfeature_weights_combined),
246  },
247  {
249  (&CSGInterface::cmd_set_dotfeature_weights_combined),
251  },
252  {
254  (&CSGInterface::cmd_set_last_subkernel_weights),
256  },
257  {
259  (&CSGInterface::cmd_get_WD_position_weights),
261  },
262  {
264  (&CSGInterface::cmd_get_last_subkernel_weights),
266  },
267  {
269  (&CSGInterface::cmd_compute_by_subkernels),
271  },
272  {
274  (&CSGInterface::cmd_init_kernel_optimization),
276  },
277  {
279  (&CSGInterface::cmd_get_kernel_optimization),
281  },
282  {
284  (&CSGInterface::cmd_delete_kernel_optimization),
286  },
287  {
289  (&CSGInterface::cmd_use_diagonal_speedup),
291  },
292  {
294  (&CSGInterface::cmd_set_kernel_optimization_type),
295  USAGE_I(N_SET_KERNEL_OPTIMIZATION_TYPE, USAGE_STR "FASTBUTMEMHUNGRY|SLOWBUTMEMEFFICIENT" USAGE_STR)
296  },
297  {
298  N_SET_SOLVER,
299  (&CSGInterface::cmd_set_solver),
300  USAGE_I(N_SET_SOLVER, USAGE_STR "AUTO|CPLEX|GLPK|INTERNAL" USAGE_STR)
301  },
302  {
304  (&CSGInterface::cmd_set_constraint_generator),
305  USAGE_I(N_SET_CONSTRAINT_GENERATOR, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS|LIBSVM"
306  "|SVMLIGHT|LIGHT|SVMLIGHT_ONECLASS|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
307  USAGE_STR)
308  },
309  {
311  (&CSGInterface::cmd_set_prior_probs),
312  USAGE_I(N_SET_PRIOR_PROBS, USAGE_STR "pos probs, neg_probs" USAGE_STR)
313  },
314  {
316  (&CSGInterface::cmd_set_prior_probs_from_labels),
318  },
319 #ifdef USE_SVMLIGHT
320  {
322  (&CSGInterface::cmd_resize_kernel_cache),
324  },
325 #endif //USE_SVMLIGHT
326 
327 
328  { "Distance", NULL, NULL },
329  {
331  (&CSGInterface::cmd_set_distance),
332  USAGE_I(N_SET_DISTANCE, "type" USAGE_COMMA "data type[" USAGE_COMMA "distance-specific parameters]")
333  },
334  {
336  (&CSGInterface::cmd_init_distance),
338  },
339  {
341  (&CSGInterface::cmd_get_distance_matrix),
343  },
344 
345 
346  { "Classifier", NULL, NULL },
347  {
348  N_CLASSIFY,
349  (&CSGInterface::cmd_classify),
350  USAGE_O(N_CLASSIFY, "result")
351  },
352  {
354  (&CSGInterface::cmd_classify),
355  USAGE_O(N_SVM_CLASSIFY, "result")
356  },
357  {
359  (&CSGInterface::cmd_classify_example),
360  USAGE_IO(N_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
361  },
362  {
364  (&CSGInterface::cmd_classify_example),
365  USAGE_IO(N_SVM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
366  },
367  {
369  (&CSGInterface::cmd_get_classifier),
370  USAGE_IO(N_GET_CLASSIFIER, "[index in case of MultiClassSVM]", "bias" USAGE_COMMA "weights")
371  },
372  {
374  (&CSGInterface::cmd_get_classifier),
375  USAGE_O(N_GET_CLUSTERING, "radi" USAGE_COMMA "centers|merge_distances" USAGE_COMMA "pairs")
376  },
377  {
378  N_NEW_SVM,
379  (&CSGInterface::cmd_new_classifier),
380  USAGE_I(N_NEW_SVM, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS|LIBSVM"
381  "|SVMLIGHT|LIGHT|LIGHT_ONECLASS|SVMLIN|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
382  "|WDSVMOCAS|SVMOCAS|SVMSGD|SVMBMRM|SVMPERF"
383  "|KERNELPERCEPTRON|PERCEPTRON|LIBLINEAR_LR|LIBLINEAR_L2|LDA"
384  "|LPM|LPBOOST|KNN" USAGE_STR)
385  },
386  {
388  (&CSGInterface::cmd_new_classifier),
389  USAGE_I(N_NEW_CLASSIFIER, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS"
390  "|LIBSVM|SVMLIGHT|LIGHT|LIGHT_ONECLASS|SVMLIN|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
391  "|WDSVMOCAS|SVMOCAS|SVMSGD|SVMBMRM|SVMPERF"
392  "|KERNELPERCEPTRON|PERCEPTRON|LIBLINEAR_LR|LIBLINEAR_L2|LDA"
393  "|LPM|LPBOOST|KNN" USAGE_STR)
394  },
395  {
397  (&CSGInterface::cmd_new_classifier),
398  USAGE_I(N_NEW_REGRESSION, USAGE_STR "SVRLIGHT|LIBSVR|KRR" USAGE_STR)
399  },
400  {
402  (&CSGInterface::cmd_new_classifier),
403  USAGE_I(N_NEW_CLUSTERING, USAGE_STR "KMEANS|HIERARCHICAL" USAGE_STR)
404  },
405  {
407  (&CSGInterface::cmd_load_classifier),
408  USAGE_O(N_LOAD_CLASSIFIER, "filename" USAGE_COMMA "type")
409  },
410  {
412  (&CSGInterface::cmd_save_classifier),
413  USAGE_I(N_SAVE_CLASSIFIER, "filename")
414  },
415  {
417  (&CSGInterface::cmd_get_num_svms),
418  USAGE_O(N_GET_NUM_SVMS, "number of SVMs in MultiClassSVM")
419  },
420  {
421  N_GET_SVM,
422  (&CSGInterface::cmd_get_svm),
423  USAGE_IO(N_GET_SVM, "[index in case of MultiClassSVM]", "bias" USAGE_COMMA "alphas")
424  },
425  {
426  N_SET_SVM,
427  (&CSGInterface::cmd_set_svm),
428  USAGE_I(N_SET_SVM, "bias" USAGE_COMMA "alphas")
429  },
430  {
432  (&CSGInterface::cmd_set_linear_classifier),
434  },
435  {
437  (&CSGInterface::cmd_get_svm_objective),
438  USAGE_O(N_GET_SVM_OBJECTIVE, "objective")
439  },
440  {
442  (&CSGInterface::cmd_compute_svm_primal_objective),
444  },
445  {
447  (&CSGInterface::cmd_compute_svm_dual_objective),
449  },
450  {
452  (&CSGInterface::cmd_compute_svm_primal_objective),
454  },
455  {
457  (&CSGInterface::cmd_compute_mkl_dual_objective),
459  },
460  {
462  (&CSGInterface::cmd_compute_relative_mkl_duality_gap),
464  },
465  {
467  (&CSGInterface::cmd_compute_absolute_mkl_duality_gap),
469  },
470  {
472  (&CSGInterface::cmd_do_auc_maximization),
474  },
475  {
477  (&CSGInterface::cmd_set_perceptron_parameters),
478  USAGE_I(N_SET_PERCEPTRON_PARAMETERS, "learnrate" USAGE_COMMA "maxiter")
479  },
480  {
482  (&CSGInterface::cmd_train_classifier),
483  USAGE_I(N_TRAIN_CLASSIFIER, "[classifier-specific parameters]")
484  },
485  {
487  (&CSGInterface::cmd_train_classifier),
489  },
490  {
492  (&CSGInterface::cmd_train_classifier),
494  },
495  {
496  N_SVM_TRAIN,
497  (&CSGInterface::cmd_train_classifier),
498  USAGE_I(N_SVM_TRAIN, "[classifier-specific parameters]")
499  },
500  {
501  N_SVMQPSIZE,
502  (&CSGInterface::cmd_set_svm_qpsize),
503  USAGE_I(N_SVMQPSIZE, "size")
504  },
505  {
507  (&CSGInterface::cmd_set_svm_max_qpsize),
508  USAGE_I(N_SVMMAXQPSIZE, "size")
509  },
510  {
511  N_SVMBUFSIZE,
512  (&CSGInterface::cmd_set_svm_bufsize),
513  USAGE_I(N_SVMBUFSIZE, "size")
514  },
515  {
516  N_C,
517  (&CSGInterface::cmd_set_svm_C),
518  USAGE_I(N_C, "C1[" USAGE_COMMA "C2]")
519  },
520  {
522  (&CSGInterface::cmd_set_svm_epsilon),
523  USAGE_I(N_SVM_EPSILON, "epsilon")
524  },
525  {
527  (&CSGInterface::cmd_set_svr_tube_epsilon),
528  USAGE_I(N_SVR_TUBE_EPSILON, "tube_epsilon")
529  },
530  {
531  N_SVM_NU,
532  (&CSGInterface::cmd_set_svm_nu),
533  USAGE_I(N_SVM_NU, "nu")
534  },
535  {
537  (&CSGInterface::cmd_set_svm_mkl_parameters),
538  USAGE_I(N_MKL_PARAMETERS, "weight_epsilon" USAGE_COMMA "C_MKL [" USAGE_COMMA "mkl_norm ]")
539  },
540  {
541  N_ENT_LAMBDA,
542  (&CSGInterface::cmd_set_elasticnet_lambda),
543  USAGE_I(N_ENT_LAMBDA, "ent_lambda")
544  },
545  {
547  (&CSGInterface::cmd_set_mkl_block_norm),
548  USAGE_I(N_MKL_BLOCK_NORM, "mkl_block_norm")
549  },
550  {
552  (&CSGInterface::cmd_set_max_train_time),
553  USAGE_I(N_SVM_MAX_TRAIN_TIME, "max_train_time")
554  },
555  {
557  (&CSGInterface::cmd_set_svm_shrinking_enabled),
558  USAGE_I(N_USE_SHRINKING, "enable_shrinking")
559  },
560  {
562  (&CSGInterface::cmd_set_svm_batch_computation_enabled),
563  USAGE_I(N_USE_BATCH_COMPUTATION, "enable_batch_computation")
564  },
565  {
566  N_USE_LINADD,
567  (&CSGInterface::cmd_set_svm_linadd_enabled),
568  USAGE_I(N_USE_LINADD, "enable_linadd")
569  },
570  {
572  (&CSGInterface::cmd_set_svm_bias_enabled),
573  USAGE_I(N_SVM_USE_BIAS, "enable_bias")
574  },
575  {
577  (&CSGInterface::cmd_set_mkl_interleaved_enabled),
578  USAGE_I(N_MKL_USE_INTERLEAVED_OPTIMIZATION, "enable_interleaved_optimization")
579  },
580  {
581  N_KRR_TAU,
582  (&CSGInterface::cmd_set_krr_tau),
583  USAGE_I(N_KRR_TAU, "tau")
584  },
585 
586 
587  { "Preprocessors", NULL, NULL },
588  {
590  (&CSGInterface::cmd_add_preproc),
591  USAGE_I(N_ADD_PREPROC, "preproc[, preproc-specific parameters]")
592  },
593  {
595  (&CSGInterface::cmd_del_preproc),
597  },
598  {
600  (&CSGInterface::cmd_attach_preproc),
602  },
603  {
605  (&CSGInterface::cmd_clean_preproc),
607  },
608 
609  { "Converters", NULL, NULL },
610  {
612  (&CSGInterface::cmd_set_converter),
614  },
615  {
617  (&CSGInterface::cmd_apply_converter),
618  USAGE_O(N_APPLY_CONVERTER, "conv_features")
619  },
620  {
621  N_EMBED,
622  (&CSGInterface::cmd_embed),
623  USAGE_IO(N_EMBED,"target dim","embedding")
624  },
625 
626 
627  { "HMM", NULL, NULL },
628  {
629  N_NEW_HMM,
630  (&CSGInterface::cmd_new_hmm),
631  USAGE_I(N_NEW_HMM, "N" USAGE_COMMA "M")
632  },
633  {
634  N_LOAD_HMM,
635  (&CSGInterface::cmd_load_hmm),
636  USAGE_I(N_LOAD_HMM, "filename")
637  },
638  {
639  N_SAVE_HMM,
640  (&CSGInterface::cmd_save_hmm),
641  USAGE_I(N_SAVE_HMM, "filename[" USAGE_COMMA "save_binary]")
642  },
643  {
644  N_GET_HMM,
645  (&CSGInterface::cmd_get_hmm),
647  },
648  {
649  N_APPEND_HMM,
650  (&CSGInterface::cmd_append_hmm),
652  },
653  {
655  (&CSGInterface::cmd_append_model),
656  USAGE_I(N_APPEND_MODEL, USAGE_STR "filename" USAGE_STR "[" USAGE_COMMA "base1" USAGE_COMMA "base2]")
657  },
658  {
659  N_SET_HMM,
660  (&CSGInterface::cmd_set_hmm),
662  },
663  {
664  N_SET_HMM_AS,
665  (&CSGInterface::cmd_set_hmm_as),
666  USAGE_I(N_SET_HMM_AS, "POS|NEG|TEST")
667  },
668  {
669  N_CHOP,
670  (&CSGInterface::cmd_set_chop),
671  USAGE_I(N_CHOP, "chop")
672  },
673  {
674  N_PSEUDO,
675  (&CSGInterface::cmd_set_pseudo),
676  USAGE_I(N_PSEUDO, "pseudo")
677  },
678  {
680  (&CSGInterface::cmd_load_definitions),
681  USAGE_I(N_LOAD_DEFINITIONS, "filename" USAGE_COMMA "init")
682  },
683  {
685  (&CSGInterface::cmd_hmm_classify),
686  USAGE_O(N_HMM_CLASSIFY, "result")
687  },
688  {
690  (&CSGInterface::cmd_one_class_linear_hmm_classify),
692  },
693  {
695  (&CSGInterface::cmd_one_class_hmm_classify),
697  },
698  {
700  (&CSGInterface::cmd_one_class_hmm_classify_example),
701  USAGE_IO(N_ONE_CLASS_HMM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
702  },
703  {
705  (&CSGInterface::cmd_hmm_classify_example),
706  USAGE_IO(N_HMM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
707  },
708  {
709  N_OUTPUT_HMM,
710  (&CSGInterface::cmd_output_hmm),
712  },
713  {
715  (&CSGInterface::cmd_output_hmm_defined),
717  },
718  {
720  (&CSGInterface::cmd_hmm_likelihood),
721  USAGE_O(N_HMM_LIKELIHOOD, "likelihood")
722  },
723  {
724  N_LIKELIHOOD,
725  (&CSGInterface::cmd_likelihood),
727  },
728  {
730  (&CSGInterface::cmd_save_likelihood),
731  USAGE_I(N_SAVE_LIKELIHOOD, "filename[" USAGE_COMMA "save_binary]")
732  },
733  {
735  (&CSGInterface::cmd_get_viterbi_path),
736  USAGE_IO(N_GET_VITERBI_PATH, "dim", "path" USAGE_COMMA "likelihood")
737  },
738  {
740  (&CSGInterface::cmd_viterbi_train_defined),
742  },
743  {
745  (&CSGInterface::cmd_viterbi_train),
747  },
748  {
750  (&CSGInterface::cmd_baum_welch_train),
752  },
753  {
755  (&CSGInterface::cmd_baum_welch_train_defined),
757  },
758  {
760  (&CSGInterface::cmd_baum_welch_trans_train),
762  },
763  {
765  (&CSGInterface::cmd_linear_train),
767  },
768  {
769  N_SAVE_PATH,
770  (&CSGInterface::cmd_save_path),
771  USAGE_I(N_SAVE_PATH, "filename[" USAGE_COMMA "save_binary]")
772  },
773  {
775  (&CSGInterface::cmd_convergence_criteria),
776  USAGE_I(N_CONVERGENCE_CRITERIA, "num_iterations" USAGE_COMMA "epsilon")
777  },
778  {
779  N_NORMALIZE,
780  (&CSGInterface::cmd_normalize),
781  USAGE_I(N_NORMALIZE, "[keep_dead_states]")
782  },
783  {
784  N_ADD_STATES,
785  (&CSGInterface::cmd_add_states),
786  USAGE_I(N_ADD_STATES, "states" USAGE_COMMA "value")
787  },
788  {
790  (&CSGInterface::cmd_permutation_entropy),
791  USAGE_I(N_PERMUTATION_ENTROPY, "width" USAGE_COMMA "seqnum")
792  },
793  {
795  (&CSGInterface::cmd_relative_entropy),
796  USAGE_O(N_RELATIVE_ENTROPY, "result")
797  },
798  {
799  N_ENTROPY,
800  (&CSGInterface::cmd_entropy),
801  USAGE_O(N_ENTROPY, "result")
802  },
803  {
804  (char*) N_SET_FEATURE_MATRIX,
805  (&CSGInterface::cmd_set_feature_matrix),
806  (char*) USAGE_I(N_SET_FEATURE_MATRIX, "features")
807  },
808  {
810  (&CSGInterface::cmd_set_feature_matrix_sparse),
811  (char*) USAGE_I(N_SET_FEATURE_MATRIX_SPARSE, "sp1" USAGE_COMMA "sp2" )
812  },
813  {
815  (&CSGInterface::cmd_new_plugin_estimator),
816  USAGE_I(N_NEW_PLUGIN_ESTIMATOR, "pos_pseudo" USAGE_COMMA "neg_pseudo")
817  },
818  {
820  (&CSGInterface::cmd_train_estimator),
822  },
823  {
825  (&CSGInterface::cmd_plugin_estimate_classify_example),
826  USAGE_IO(N_PLUGIN_ESTIMATE_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
827  },
828  {
830  (&CSGInterface::cmd_plugin_estimate_classify),
832  },
833  {
835  (&CSGInterface::cmd_set_plugin_estimate),
836  USAGE_I(N_SET_PLUGIN_ESTIMATE, "emission_probs" USAGE_COMMA "model_sizes")
837  },
838  {
840  (&CSGInterface::cmd_get_plugin_estimate),
841  USAGE_O(N_GET_PLUGIN_ESTIMATE, "emission_probs" USAGE_COMMA "model_sizes")
842  },
843  { "Signals", NULL, NULL },
844  {
846  (&CSGInterface::cmd_signals_set_model),
848  },
849  {
851  (&CSGInterface::cmd_signals_set_positions),
852  USAGE_I(N_SIGNALS_SET_POSITIONS, "positions")
853  },
854  {
856  (&CSGInterface::cmd_signals_set_labels),
857  USAGE_I(N_SIGNALS_SET_LABELS, "labels")
858  },
859  {
861  (&CSGInterface::cmd_signals_set_split),
862  USAGE_I(N_SIGNALS_SET_SPLIT, "split")
863  },
864  {
866  (&CSGInterface::cmd_signals_set_train_mask),
868  },
869  {
871  (&CSGInterface::cmd_signals_add_feature),
872  USAGE_I(N_SIGNALS_ADD_FEATURE, "feature")
873  },
874  {
876  (&CSGInterface::cmd_signals_add_kernel),
877  USAGE_I(N_SIGNALS_ADD_KERNEL, "kernelparam")
878  },
879  {
881  (&CSGInterface::cmd_signals_run),
882  USAGE_I(N_SIGNALS_RUN, "arg1")
883  },
884  { "Structure", NULL, NULL },
885  {
886  N_BEST_PATH,
887  (&CSGInterface::cmd_best_path),
888  USAGE_I(N_BEST_PATH, "from" USAGE_COMMA "to")
889  },
890  {
892  (&CSGInterface::cmd_best_path_2struct),
894  USAGE_COMMA "q"
895  USAGE_COMMA "cmd_trans"
896  USAGE_COMMA "seq"
897  USAGE_COMMA "pos"
898  USAGE_COMMA "genestr"
899  USAGE_COMMA "penalties"
900  USAGE_COMMA "penalty_info"
901  USAGE_COMMA "nbest"
902  USAGE_COMMA "content_weights"
903  USAGE_COMMA "segment_sum_weights",
904  "prob" USAGE_COMMA "path" USAGE_COMMA "pos")
905  },
906  {
907  (char*) N_SET_PLIF_STRUCT,
908  (&CSGInterface::cmd_set_plif_struct),
909  (char*) USAGE_I(N_SET_PLIF_STRUCT, "id"
910  USAGE_COMMA "name"
911  USAGE_COMMA "limits"
912  USAGE_COMMA "penalties"
913  USAGE_COMMA "transform"
914  USAGE_COMMA "min_value"
915  USAGE_COMMA "max_value"
916  USAGE_COMMA "use_cache"
917  USAGE_COMMA "use_svm")
918  },
919  {
920  (char*) N_GET_PLIF_STRUCT,
921  (&CSGInterface::cmd_get_plif_struct),
922  (char*) USAGE_O(N_GET_PLIF_STRUCT, "id"
923  USAGE_COMMA "name"
924  USAGE_COMMA "limits"
925  USAGE_COMMA "penalties"
926  USAGE_COMMA "transform"
927  USAGE_COMMA "min_value"
928  USAGE_COMMA "max_value"
929  USAGE_COMMA "use_cache"
930  USAGE_COMMA "use_svm")
931  },
932  {
933  (char*) N_PRECOMPUTE_SUBKERNELS,
934  (&CSGInterface::cmd_precompute_subkernels),
936  },
937  {
939  (&CSGInterface::cmd_precompute_content_svms),
940  (char*) USAGE_I(N_PRECOMPUTE_CONTENT_SVMS, "sequence"
941  USAGE_COMMA "position_list"
942  USAGE_COMMA "weights")
943  },
944  {
945  (char*) N_GET_LIN_FEAT,
946  (&CSGInterface::cmd_get_lin_feat),
947  (char*) USAGE_O(N_GET_LIN_FEAT, "lin_feat")
948  },
949  {
950  (char*) N_SET_LIN_FEAT,
951  (&CSGInterface::cmd_set_lin_feat),
952  (char*) USAGE_I(N_SET_LIN_FEAT, "lin_feat")
953  },
954  {
955  (char*) N_INIT_DYN_PROG,
956  (&CSGInterface::cmd_init_dyn_prog),
957  (char*) USAGE_I(N_INIT_DYN_PROG, "num_svms")
958  },
959  {
960  (char*) N_CLEAN_UP_DYN_PROG,
961  (&CSGInterface::cmd_clean_up_dyn_prog),
962  (char*) USAGE(N_CLEAN_UP_DYN_PROG)
963  },
964  {
965  (char*) N_INIT_INTRON_LIST,
966  (&CSGInterface::cmd_init_intron_list),
967  (char*) USAGE_I(N_INIT_INTRON_LIST, "start_positions"
968  USAGE_COMMA "end_positions"
969  USAGE_COMMA "quality")
970  },
971  {
973  (&CSGInterface::cmd_precompute_tiling_features),
974  (char*) USAGE_I(N_PRECOMPUTE_TILING_FEATURES, "intensities"
975  USAGE_COMMA "probe_pos"
976  USAGE_COMMA "tiling_plif_ids")
977  },
978  {
980  (&CSGInterface::cmd_long_transition_settings),
981  (char*) USAGE_I(N_LONG_TRANSITION_SETTINGS, "use_long_transitions"
982  USAGE_COMMA "threshold"
983  USAGE_COMMA "max_len")
984  },
985 
986  {
987  (char*) N_SET_MODEL,
988  (&CSGInterface::cmd_set_model),
989  (char*) USAGE_I(N_SET_MODEL, "content_weights"
990  USAGE_COMMA "transition_pointers"
991  USAGE_COMMA "use_orf"
992  USAGE_COMMA "mod_words")
993  },
994 
995  {
996  (char*) N_BEST_PATH_TRANS,
997  (&CSGInterface::cmd_best_path_trans),
999  USAGE_COMMA "q"
1000  USAGE_COMMA "nbest"
1001  USAGE_COMMA "seq_path"
1002  USAGE_COMMA "a_trans"
1003  USAGE_COMMA "segment_loss",
1004  "prob" USAGE_COMMA "path" USAGE_COMMA "pos")
1005  },
1006  {
1008  (&CSGInterface::cmd_best_path_trans_deriv),
1010  USAGE_COMMA "my_path"
1011  USAGE_COMMA "my_pos"
1012  USAGE_COMMA "p"
1013  USAGE_COMMA "q"
1014  USAGE_COMMA "cmd_trans"
1015  USAGE_COMMA "seq"
1016  USAGE_COMMA "pos"
1017  USAGE_COMMA "genestr"
1018  USAGE_COMMA "penalties"
1019  USAGE_COMMA "state_signals"
1020  USAGE_COMMA "penalty_info"
1021  USAGE_COMMA "dict_weights"
1022  USAGE_COMMA "mod_words ["
1023  USAGE_COMMA "segment_loss"
1024  USAGE_COMMA "segmend_ids_mask]", "p_deriv"
1025  USAGE_COMMA "q_deriv"
1026  USAGE_COMMA "cmd_deriv"
1027  USAGE_COMMA "penalties_deriv"
1028  USAGE_COMMA "my_scores"
1029  USAGE_COMMA "my_loss")
1030  },
1031 
1032  { "POIM", NULL, NULL },
1033  {
1035  (&CSGInterface::cmd_compute_POIM_WD),
1036  USAGE_IO(N_COMPUTE_POIM_WD, "max_order" USAGE_COMMA "distribution", "W")
1037  },
1038  {
1040  (&CSGInterface::cmd_get_SPEC_consensus),
1042  },
1043  {
1045  (&CSGInterface::cmd_get_SPEC_scoring),
1046  USAGE_IO(N_GET_SPEC_SCORING, "max_order", "W")
1047  },
1048  {
1050  (&CSGInterface::cmd_get_WD_consensus),
1052  },
1053  {
1055  (&CSGInterface::cmd_get_WD_scoring),
1056  USAGE_IO(N_GET_WD_SCORING, "max_order", "W")
1057  },
1058 
1059 
1060  { "Utility", NULL, NULL },
1061  {
1062  N_CRC,
1063  (&CSGInterface::cmd_crc),
1064  USAGE_IO(N_CRC, "string", "crc32")
1065  },
1066  {
1067  N_SYSTEM,
1068  (&CSGInterface::cmd_system),
1069  USAGE_I(N_SYSTEM, "system_command")
1070  },
1071  {
1072  N_EXIT,
1073  (&CSGInterface::cmd_exit),
1074  USAGE(N_EXIT)
1075  },
1076  {
1077  N_QUIT,
1078  (&CSGInterface::cmd_exit),
1079  USAGE(N_QUIT)
1080  },
1081  {
1082  N_EXEC,
1083  (&CSGInterface::cmd_exec),
1084  USAGE_I(N_EXEC, "filename")
1085  },
1086  {
1087  N_SET_OUTPUT,
1088  (&CSGInterface::cmd_set_output),
1089  USAGE_I(N_SET_OUTPUT, USAGE_STR "STDERR|STDOUT|filename" USAGE_STR)
1090  },
1091  {
1093  (&CSGInterface::cmd_set_threshold),
1094  USAGE_I(N_SET_THRESHOLD, "threshold")
1095  },
1096  {
1097  N_INIT_RANDOM,
1098  (&CSGInterface::cmd_init_random),
1099  USAGE_I(N_INIT_RANDOM, "value_to_initialize_RNG_with")
1100  },
1101  {
1102  N_THREADS,
1103  (&CSGInterface::cmd_set_num_threads),
1104  USAGE_I(N_THREADS, "num_threads")
1105  },
1106  {
1108  (&CSGInterface::cmd_translate_string),
1110  "string, order, start", "translation")
1111  },
1112  {
1113  N_CLEAR,
1114  (&CSGInterface::cmd_clear),
1115  USAGE(N_CLEAR)
1116  },
1117  {
1118  N_TIC,
1119  (&CSGInterface::cmd_tic),
1120  USAGE(N_TIC)
1121  },
1122  {
1123  N_TOC,
1124  (&CSGInterface::cmd_toc),
1125  USAGE(N_TOC)
1126  },
1127  {
1128  N_PRINT,
1129  (&CSGInterface::cmd_print),
1130  USAGE_I(N_PRINT, "msg")
1131  },
1132  {
1133  N_ECHO,
1134  (&CSGInterface::cmd_echo),
1135  USAGE_I(N_ECHO, "level")
1136  },
1137  {
1138  N_LOGLEVEL,
1139  (&CSGInterface::cmd_loglevel),
1140  USAGE_I(N_LOGLEVEL, USAGE_STR "ALL|DEBUG|INFO|NOTICE|WARN|ERROR|CRITICAL|ALERT|EMERGENCY" USAGE_STR)
1141  },
1142  {
1144  (&CSGInterface::cmd_syntax_highlight),
1146  },
1147  {
1148  N_PROGRESS,
1149  (&CSGInterface::cmd_progress),
1151  },
1152  {
1153  N_GET_VERSION,
1154  (&CSGInterface::cmd_get_version),
1155  USAGE_O(N_GET_VERSION, "version")
1156  },
1157  {
1158  N_HELP,
1159  (&CSGInterface::cmd_help),
1160  USAGE(N_HELP)
1161  },
1162  {
1163  N_WHOS,
1164  (&CSGInterface::cmd_whos),
1165  USAGE(N_WHOS)
1166  },
1167  {
1169  (&CSGInterface::cmd_send_command),
1170  NULL
1171  },
1172  {
1173  N_RUN_PYTHON,
1174  (&CSGInterface::cmd_run_python),
1176  "'Var1', Var1, 'Var2', Var2,..., python_function", "results")
1177  },
1178  {
1179  N_RUN_OCTAVE,
1180  (&CSGInterface::cmd_run_octave),
1182  "'Var1', Var1, 'Var2', Var2,..., octave_function", "results")
1183  },
1184  {
1185  N_RUN_R,
1186  (&CSGInterface::cmd_run_r),
1187  USAGE_IO(N_RUN_R,
1188  "'Var1', Var1, 'Var2', Var2,..., r_function", "results")
1189  },
1190  {NULL, NULL, NULL} /* Sentinel */
1191 };
1192 
1193 
1194 CSGInterface::CSGInterface(bool print_copyright)
1195 : CSGObject(),
1196  ui_classifier(new CGUIClassifier(this)),
1197  ui_distance(new CGUIDistance(this)),
1198  ui_features(new CGUIFeatures(this)),
1199  ui_hmm(new CGUIHMM(this)),
1200  ui_kernel(new CGUIKernel(this)),
1201  ui_labels(new CGUILabels(this)),
1202  ui_math(new CGUIMath(this)),
1203  ui_pluginestimate(new CGUIPluginEstimate(this)),
1204  ui_preproc(new CGUIPreprocessor(this)),
1205  ui_time(new CGUITime(this)),
1206  ui_structure(new CGUIStructure(this)),
1207  ui_converter(new CGUIConverter(this))/*,
1208 / ui_signals(new CGUISignals(this))*/
1209 {
1210  if (print_copyright)
1211  {
1212  version->print_version();
1213  SG_PRINT("( seeding random number generator with %u (seed size %d))\n",
1215 #ifdef USE_LOGCACHE
1216  SG_PRINT("initializing log-table (size=%i*%i*%i=%2.1fMB) ... ) ",
1217  CMath::get_log_range(),CMath::get_log_accuracy(),sizeof(float64_t),
1218  CMath::get_log_range()*CMath::get_log_accuracy()*sizeof(float64_t)/(1024.0*1024.0));
1219 #else
1220  SG_PRINT("determined range for x in log(1+exp(-x)) is:%d )\n", CMath::get_log_range())
1221 #endif
1222  }
1223 
1224  reset();
1225 }
1226 
1227 CSGInterface::~CSGInterface()
1228 {
1229  delete ui_classifier;
1230  delete ui_hmm;
1231  delete ui_pluginestimate;
1232  delete ui_kernel;
1233  delete ui_preproc;
1234  delete ui_features;
1235  delete ui_labels;
1236  delete ui_math;
1237  delete ui_structure;
1238  //delete ui_signals;
1239  delete ui_time;
1240  delete ui_distance;
1241  delete ui_converter;
1242 
1243  if (file_out)
1244  fclose(file_out);
1245 }
1246 
1247 void CSGInterface::reset()
1248 {
1249  m_lhs_counter=0;
1250  m_rhs_counter=0;
1251  m_nlhs=0;
1252  m_nrhs=0;
1253  m_legacy_strptr=NULL;
1254  file_out=NULL;
1255  echo=true;
1256 }
1257 
1258 void CSGInterface::translate_arg(CSGInterface* source, CSGInterface* target)
1259 {
1260  switch (source->get_argument_type())
1261  {
1262  case SCALAR_INT:
1263  target->set_int(source->get_int());
1264  break;
1265  case SCALAR_REAL:
1266  target->set_real(source->get_real());
1267  break;
1268  case SCALAR_BOOL:
1269  target->set_bool(source->get_bool());
1270  break;
1271  case VECTOR_BOOL:
1272  {
1273  bool* v=NULL;
1274  int32_t len=0;
1275  source->get_vector(v, len);
1276  target->set_vector(v, len);
1277  SG_FREE(v);
1278  break;
1279  }
1280  case VECTOR_BYTE:
1281  {
1282  uint8_t* v=NULL;
1283  int32_t len=0;
1284  source->get_vector(v, len);
1285  target->set_vector(v, len);
1286  SG_FREE(v);
1287  break;
1288  }
1289  case VECTOR_CHAR:
1290  {
1291  char* v=NULL;
1292  int32_t len=0;
1293  source->get_vector(v, len);
1294  target->set_vector(v, len);
1295  SG_FREE(v);
1296  break;
1297  }
1298  case VECTOR_INT:
1299  {
1300  int32_t* v=NULL;
1301  int32_t len=0;
1302  source->get_vector(v, len);
1303  target->set_vector(v, len);
1304  SG_FREE(v);
1305  break;
1306  }
1307  case VECTOR_REAL:
1308  {
1309  float64_t* v=NULL;
1310  int32_t len=0;
1311  source->get_vector(v, len);
1312  target->set_vector(v, len);
1313  SG_FREE(v);
1314  break;
1315  }
1316  case VECTOR_SHORTREAL:
1317  {
1318  float32_t* v=NULL;
1319  int32_t len=0;
1320  source->get_vector(v, len);
1321  target->set_vector(v, len);
1322  SG_FREE(v);
1323  break;
1324  }
1325  case VECTOR_SHORT:
1326  {
1327  int16_t* v=NULL;
1328  int32_t len=0;
1329  source->get_vector(v, len);
1330  target->set_vector(v, len);
1331  SG_FREE(v);
1332  break;
1333  }
1334  case VECTOR_WORD:
1335  {
1336  uint16_t* v=NULL;
1337  int32_t len=0;
1338  source->get_vector(v, len);
1339  target->set_vector(v, len);
1340  SG_FREE(v);
1341  break;
1342  }
1343 
1344  case STRING_BYTE:
1345  {
1346  int32_t num_str=0;
1347  int32_t max_str_len=0;
1348  SGString<uint8_t>* strs=NULL;
1349  source->get_string_list(strs, num_str, max_str_len);
1350  target->set_string_list(strs, num_str);
1351  SG_FREE(strs);
1352  break;
1353  }
1354  case STRING_CHAR:
1355  {
1356  int32_t num_str=0;
1357  int32_t max_str_len=0;
1358  SGString<char>* strs;
1359  source->get_string_list(strs, num_str,max_str_len);
1360  target->set_string_list(strs, num_str);
1361  SG_FREE(strs);
1362  break;
1363  }
1364  case STRING_INT:
1365  {
1366  int32_t num_str=0;
1367  int32_t max_str_len=0;
1368  SGString<int32_t>* strs;
1369  source->get_string_list(strs, num_str,max_str_len);
1370  target->set_string_list(strs, num_str);
1371  SG_FREE(strs);
1372  break;
1373  }
1374  case STRING_SHORT:
1375  {
1376  int32_t num_str=0;
1377  int32_t max_str_len=0;
1378  SGString<int16_t>* strs=NULL;
1379  source->get_string_list(strs, num_str, max_str_len);
1380  target->set_string_list(strs, num_str);
1381  SG_FREE(strs);
1382  break;
1383  }
1384  case STRING_WORD:
1385  {
1386  int32_t num_str=0;
1387  int32_t max_str_len=0;
1388  SGString<uint16_t>* strs=NULL;
1389  source->get_string_list(strs, num_str, max_str_len);
1390  target->set_string_list(strs, num_str);
1391  SG_FREE(strs);
1392  break;
1393  }
1394  case DENSE_INT:
1395  {
1396  int32_t num_feat=0;
1397  int32_t num_vec=0;
1398  int32_t* fmatrix=NULL;
1399  source->get_matrix(fmatrix, num_feat, num_vec);
1400  target->set_matrix(fmatrix, num_feat, num_vec);
1401  SG_FREE(fmatrix);
1402  break;
1403  }
1404  case DENSE_REAL:
1405  {
1406  int32_t num_feat=0;
1407  int32_t num_vec=0;
1408  float64_t* fmatrix=NULL;
1409  source->get_matrix(fmatrix, num_feat, num_vec);
1410  target->set_matrix(fmatrix, num_feat, num_vec);
1411  SG_FREE(fmatrix);
1412  break;
1413  }
1414  case DENSE_SHORT:
1415  {
1416  int32_t num_feat=0;
1417  int32_t num_vec=0;
1418  int16_t* fmatrix=NULL;
1419  source->get_matrix(fmatrix, num_feat, num_vec);
1420  target->set_matrix(fmatrix, num_feat, num_vec);
1421  SG_FREE(fmatrix);
1422  break;
1423  }
1424  case DENSE_SHORTREAL:
1425  {
1426  int32_t num_feat=0;
1427  int32_t num_vec=0;
1428  float32_t* fmatrix=NULL;
1429  source->get_matrix(fmatrix, num_feat, num_vec);
1430  target->set_matrix(fmatrix, num_feat, num_vec);
1431  SG_FREE(fmatrix);
1432  break;
1433  }
1434  case DENSE_WORD:
1435  {
1436  int32_t num_feat=0;
1437  int32_t num_vec=0;
1438  uint16_t* fmatrix=NULL;
1439  source->get_matrix(fmatrix, num_feat, num_vec);
1440  target->set_matrix(fmatrix, num_feat, num_vec);
1441  SG_FREE(fmatrix);
1442  break;
1443  }
1444  /*
1445  case NDARRAY_BYTE:
1446  {
1447  uint8_t* a=NULL;
1448  int32_t* dims=NULL;
1449  int32_t num_dims=0;
1450  source->get_ndarray(a, dims, num_dims);
1451  target->set_ndarray(a, dims, num_dims);
1452  SG_FREE(a);
1453  SG_FREE(dims);
1454  break;
1455  }
1456  case NDARRAY_CHAR:
1457  {
1458  char* a=NULL;
1459  int32_t* dims=NULL;
1460  int32_t num_dims=0;
1461  source->get_ndarray(a, dims, num_dims);
1462  target->set_ndarray(a, dims, num_dims);
1463  SG_FREE(a);
1464  SG_FREE(dims);
1465  break;
1466  }
1467  case NDARRAY_INT:
1468  {
1469  int32_t* a=NULL;
1470  int32_t* dims=NULL;
1471  int32_t num_dims=0;
1472  source->get_ndarray(a, dims, num_dims);
1473  target->set_ndarray(a, dims, num_dims);
1474  SG_FREE(a);
1475  SG_FREE(dims);
1476  break;
1477  }
1478  case NDARRAY_REAL:
1479  {
1480  float64_t* a=NULL;
1481  int32_t* dims=NULL;
1482  int32_t num_dims=0;
1483  source->get_ndarray(a, dims, num_dims);
1484  target->set_ndarray(a, dims, num_dims);
1485  SG_FREE(a);
1486  SG_FREE(dims);
1487  break;
1488  }
1489  case NDARRAY_SHORTREAL:
1490  {
1491  float32_t* a=NULL;
1492  int32_t* dims=NULL;
1493  int32_t num_dims=0;
1494  source->get_ndarray(a, dims, num_dims);
1495  target->set_ndarray(a, dims, num_dims);
1496  SG_FREE(a);
1497  SG_FREE(dims);
1498  break;
1499  }
1500  case NDARRAY_SHORT:
1501  {
1502  int16_t* a=NULL;
1503  int32_t* dims=NULL;
1504  int32_t num_dims=0;
1505  source->get_ndarray(a, dims, num_dims);
1506  target->set_ndarray(a, dims, num_dims);
1507  SG_FREE(a);
1508  SG_FREE(dims);
1509  break;
1510  }
1511  case NDARRAY_WORD:
1512  {
1513  uint16_t* a=NULL;
1514  int32_t* dims=NULL;
1515  int32_t num_dims=0;
1516  source->get_ndarray(a, dims, num_dims);
1517  target->set_ndarray(a, dims, num_dims);
1518  SG_FREE(a);
1519  SG_FREE(dims);
1520  break;
1521  }*/
1522  case SPARSE_REAL:
1523  {
1524  int32_t num_feat=0;
1525  int32_t num_vec=0;
1526  SGSparseVector<float64_t>* fmatrix=NULL;
1527  source->get_sparse_matrix(fmatrix, num_feat, num_vec);
1528  int64_t nnz=0;
1529  for (int32_t i=0; i<num_vec; i++)
1530  nnz+=fmatrix[i].num_feat_entries;
1531  target->set_sparse_matrix(fmatrix, num_feat, num_vec, nnz);
1532  SG_FREE(fmatrix);
1533  break;
1534  }
1535 
1536  default:
1537  SG_ERROR("unknown return type")
1538  break;
1539  }
1540 }
1541 
1543 // commands
1545 
1546 /* Features */
1547 
1548 bool CSGInterface::cmd_load_features()
1549 {
1550  if (m_nrhs<8 || !create_return_values(0))
1551  return false;
1552 
1553  int32_t len=0;
1554  char* filename=get_str_from_str_or_direct(len);
1555  char* fclass=get_str_from_str_or_direct(len);
1556  char* type=get_str_from_str_or_direct(len);
1557  char* target=get_str_from_str_or_direct(len);
1558  int32_t size=get_int_from_int_or_str();
1559  int32_t comp_features=get_int_from_int_or_str();
1560 
1561  bool success=ui_features->load(
1562  filename, fclass, type, target, size, comp_features);
1563 
1564  SG_FREE(filename);
1565  SG_FREE(fclass);
1566  SG_FREE(type);
1567  SG_FREE(target);
1568  return success;
1569 }
1570 
1571 bool CSGInterface::cmd_save_features()
1572 {
1573  if (m_nrhs<5 || !create_return_values(0))
1574  return false;
1575 
1576  int32_t len=0;
1577  char* filename=get_str_from_str_or_direct(len);
1578  char* type=get_str_from_str_or_direct(len);
1579  char* target=get_str_from_str_or_direct(len);
1580 
1581  bool success=ui_features->save(filename, type, target);
1582 
1583  SG_FREE(filename);
1584  SG_FREE(type);
1585  SG_FREE(target);
1586  return success;
1587 }
1588 
1589 bool CSGInterface::cmd_clean_features()
1590 {
1591  if (m_nrhs<2 || !create_return_values(0))
1592  return false;
1593 
1594  int32_t len=0;
1595  char* target=get_str_from_str_or_direct(len);
1596 
1597  bool success=ui_features->clean(target);
1598 
1599  SG_FREE(target);
1600  return success;
1601 }
1602 
1603 bool CSGInterface::cmd_get_features()
1604 {
1605  if (m_nrhs!=2 || !create_return_values(1))
1606  return false;
1607 
1608  int32_t tlen=0;
1609  char* target=get_string(tlen);
1610  CFeatures* feat=NULL;
1611 
1612  if (strmatch(target, "TRAIN"))
1613  feat=ui_features->get_train_features();
1614  else if (strmatch(target, "TEST"))
1615  feat=ui_features->get_test_features();
1616  else
1617  {
1618  SG_FREE(target);
1619  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
1620  }
1621  SG_FREE(target);
1622 
1623  ASSERT(feat)
1624 
1625  switch (feat->get_feature_class())
1626  {
1627  case C_DENSE:
1628  {
1629  int32_t num_feat=0;
1630  int32_t num_vec=0;
1631 
1632  switch (feat->get_feature_type())
1633  {
1634  case F_BYTE:
1635  {
1636  uint8_t* fmatrix=((CDenseFeatures<uint8_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1637  set_matrix(fmatrix, num_feat, num_vec);
1638  break;
1639  }
1640 
1641  case F_CHAR:
1642  {
1643  char* fmatrix=((CDenseFeatures<char> *) feat)->get_feature_matrix(num_feat, num_vec);
1644  set_matrix(fmatrix, num_feat, num_vec);
1645  break;
1646  }
1647 
1648  case F_DREAL:
1649  {
1650  float64_t* fmatrix=((CDenseFeatures<float64_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1651  set_matrix(fmatrix, num_feat, num_vec);
1652  break;
1653  }
1654 
1655  case F_INT:
1656  {
1657  int32_t* fmatrix=((CDenseFeatures<int32_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1658  set_matrix(fmatrix, num_feat, num_vec);
1659  break;
1660  }
1661 
1662  case F_SHORT:
1663  {
1664  int16_t* fmatrix=((CDenseFeatures<int16_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1665  set_matrix(fmatrix, num_feat, num_vec);
1666  break;
1667  }
1668 
1669  case F_SHORTREAL:
1670  {
1671  float32_t* fmatrix=((CDenseFeatures<float32_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1672  set_matrix(fmatrix, num_feat, num_vec);
1673  break;
1674  }
1675 
1676  case F_WORD:
1677  {
1678  uint16_t* fmatrix=((CDenseFeatures<uint16_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1679  set_matrix(fmatrix, num_feat, num_vec);
1680  break;
1681  }
1682 
1683  default:
1685  }
1686  break;
1687  }
1688 
1689  case C_SPARSE:
1690  {
1691  switch (feat->get_feature_type())
1692  {
1693  case F_DREAL:
1694  {
1695  int64_t nnz=((CSparseFeatures<float64_t>*) feat)->
1696  get_num_nonzero_entries();
1697  SGSparseMatrix<float64_t> fmatrix = ((CSparseFeatures<float64_t>*) feat)->get_sparse_feature_matrix();
1698  SG_INFO("sparse matrix has %d feats, %d vecs and %d nnz elemements\n", fmatrix.num_features, fmatrix.num_vectors, nnz)
1699 
1700  set_sparse_matrix(fmatrix.sparse_matrix, fmatrix.num_features, fmatrix.num_vectors, nnz);
1701  break;
1702  }
1703 
1704  default:
1706  }
1707  break;
1708  }
1709 
1710  case C_STRING:
1711  {
1712  int32_t num_str=0;
1713  int32_t max_str_len=0;
1714  switch (feat->get_feature_type())
1715  {
1716  case F_BYTE:
1717  {
1718  SGString<uint8_t>* fmatrix=((CStringFeatures<uint8_t>*) feat)->get_features(num_str, max_str_len);
1719  set_string_list(fmatrix, num_str);
1720  break;
1721  }
1722 
1723  case F_CHAR:
1724  {
1725  SGString<char>* fmatrix=((CStringFeatures<char>*) feat)->get_features(num_str, max_str_len);
1726  set_string_list(fmatrix, num_str);
1727  break;
1728  }
1729 
1730  case F_WORD:
1731  {
1732  SGString<uint16_t>* fmatrix=((CStringFeatures<uint16_t>*) feat)->get_features(num_str, max_str_len);
1733  set_string_list(fmatrix, num_str);
1734  break;
1735  }
1736 
1737  default:
1739  }
1740  break;
1741  }
1742 
1743  case C_WD:
1744  case C_WEIGHTEDSPEC:
1745  case C_SPEC:
1746  case C_COMBINED_DOT:
1747  case C_POLY:
1748  {
1749 
1750  SGMatrix<float64_t> fmatrix = ((CDotFeatures*) feat)->get_computed_dot_feature_matrix();
1751  set_matrix(fmatrix.matrix, fmatrix.num_cols, fmatrix.num_rows);
1752  break;
1753  }
1754 
1755  default:
1757  }
1758 
1759  return true;
1760 }
1761 
1762 bool CSGInterface::cmd_add_features()
1763 {
1764  if (m_nrhs<3 || !create_return_values(0))
1765  return false;
1766 
1767  return do_set_features(true, false);
1768 }
1769 
1770 bool CSGInterface::cmd_add_multiple_features()
1771 {
1772  if ((m_nrhs!=4 && m_nrhs<5) || !create_return_values(0))
1773  return false;
1774 
1775  int32_t repetitions=get_int();
1776 
1777  ASSERT(repetitions>=1)
1778 
1779  return do_set_features(true, false, repetitions);
1780 }
1781 
1782 bool CSGInterface::cmd_add_dotfeatures()
1783 {
1784  if (m_nrhs<3 || !create_return_values(0))
1785  return false;
1786 
1787  return do_set_features(true, true);
1788 }
1789 
1790 bool CSGInterface::cmd_set_features()
1791 {
1792  if (m_nrhs<3 || !create_return_values(0))
1793  return false;
1794 
1795  return do_set_features(false, false);
1796 }
1797 
1798 bool CSGInterface::do_set_features(bool add, bool check_dot, int32_t repetitions)
1799 {
1800  int32_t tlen=0;
1801  char* target=get_string(tlen);
1802  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
1803  {
1804  SG_FREE(target);
1805  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
1806  }
1807 
1808  CFeatures* feat=NULL;
1809  int32_t num_feat=0;
1810  int32_t num_vec=0;
1811 
1812  switch (get_argument_type())
1813  {
1814  case SPARSE_REAL:
1815  {
1816  SGSparseVector<float64_t>* fmatrix=NULL;
1817  get_sparse_matrix(fmatrix, num_feat, num_vec);
1818 
1819  feat=new CSparseFeatures<float64_t>(SGSparseMatrix<float64_t>(fmatrix, num_feat, num_vec));
1820  break;
1821  }
1822 
1823  case DENSE_REAL:
1824  {
1825  float64_t* fmatrix=NULL;
1826  get_matrix(fmatrix, num_feat, num_vec);
1827 
1828  feat=new CDenseFeatures<float64_t>(0);
1829  ((CDenseFeatures<float64_t>*) feat)->
1830  set_feature_matrix(SGMatrix<float64_t>(fmatrix, num_feat, num_vec));
1831 
1832  if (m_nrhs==6)
1833  feat = create_custom_real_features((CDenseFeatures<float64_t>*) feat);
1834 
1835  break;
1836  }
1837 
1838  case DENSE_INT:
1839  {
1840  int32_t* fmatrix=NULL;
1841  get_matrix(fmatrix, num_feat, num_vec);
1842 
1843  feat=new CDenseFeatures<int32_t>(0);
1844  ((CDenseFeatures<int32_t>*) feat)->
1845  set_feature_matrix(SGMatrix<int32_t>(fmatrix, num_feat, num_vec));
1846  break;
1847  }
1848 
1849  case DENSE_SHORT:
1850  {
1851  int16_t* fmatrix=NULL;
1852  get_matrix(fmatrix, num_feat, num_vec);
1853 
1854  feat=new CDenseFeatures<int16_t>(0);
1855  ((CDenseFeatures<int16_t>*) feat)->
1856  set_feature_matrix(SGMatrix<int16_t>(fmatrix, num_feat, num_vec));
1857  break;
1858  }
1859 
1860  case DENSE_WORD:
1861  {
1862  uint16_t* fmatrix=NULL;
1863  get_matrix(fmatrix, num_feat, num_vec);
1864 
1865  feat=new CDenseFeatures<uint16_t>(0);
1866  ((CDenseFeatures<uint16_t>*) feat)->
1867  set_feature_matrix(SGMatrix<uint16_t>(fmatrix, num_feat, num_vec));
1868  break;
1869  }
1870 
1871  case DENSE_SHORTREAL:
1872  {
1873  float32_t* fmatrix=NULL;
1874  get_matrix(fmatrix, num_feat, num_vec);
1875 
1876  feat=new CDenseFeatures<float32_t>(0);
1877  ((CDenseFeatures<float32_t>*) feat)->
1878  set_feature_matrix(SGMatrix<float32_t>(fmatrix, num_feat, num_vec));
1879  break;
1880  }
1881 
1882  case STRING_CHAR:
1883  {
1884  if (m_nrhs<4)
1885  SG_ERROR("Please specify alphabet!\n")
1886 
1887  int32_t num_str=0;
1888  int32_t max_str_len=0;
1889  SGString<char>* fmatrix=NULL;
1890  get_string_list(fmatrix, num_str, max_str_len);
1891 
1892  int32_t alphabet_len=0;
1893  char* alphabet_str=get_string(alphabet_len);
1894  ASSERT(alphabet_str)
1895 
1896  if (strmatch(alphabet_str, "DNABINFILE"))
1897  {
1898  SG_FREE(alphabet_str);
1899 
1900  ASSERT(fmatrix[0].string)
1901  feat=new CStringFeatures<uint8_t>(DNA);
1902 
1903  try
1904  {
1905  ((CStringFeatures<uint8_t>*) feat)->load_ascii_file(fmatrix[0].string);
1906  }
1907  catch (...)
1908  {
1909  SG_UNREF(feat);
1910  SG_ERROR("Couldn't load DNA features from file.\n")
1911  }
1912  feat=create_custom_string_features((CStringFeatures<uint8_t>*) feat);
1913  break;
1914  }
1915  else
1916  {
1917  bool convert_to_word=false;
1918  bool convert_to_ulong=false;
1919  CAlphabet* alphabet=NULL;
1920  if (strmatch(alphabet_str, "DNAWORD"))
1921  {
1922  alphabet=new CAlphabet(DNA);
1923  convert_to_word=true;
1924  }
1925  else if (strmatch(alphabet_str, "DNAULONG"))
1926  {
1927  alphabet=new CAlphabet(DNA);
1928  convert_to_ulong=true;
1929  }
1930  else
1931  alphabet=new CAlphabet(alphabet_str, alphabet_len);
1932 
1933  SG_REF(alphabet);
1934  SG_FREE(alphabet_str);
1935 
1936  feat=new CStringFeatures<char>(alphabet);
1937 
1938  if (!((CStringFeatures<char>*) feat)->set_features(fmatrix, num_str, max_str_len))
1939  {
1940  SG_UNREF(alphabet);
1941  SG_UNREF(feat);
1942  SG_ERROR("Couldnt set byte string features.\n")
1943  }
1944 
1945  SG_UNREF(alphabet);
1946 
1947  if (convert_to_word || convert_to_ulong)
1948  convert_to_bitembedding(feat, convert_to_word, convert_to_ulong);
1949  }
1950 
1951  obtain_from_single_string(feat);
1952  break;
1953  }
1954 
1955  case STRING_BYTE:
1956  {
1957  if (m_nrhs<4)
1958  SG_ERROR("Please specify alphabet!\n")
1959 
1960  int32_t num_str=0;
1961  int32_t max_str_len=0;
1962  SGString<uint8_t>* fmatrix=NULL;
1963  get_string_list(fmatrix, num_str, max_str_len);
1964 
1965  int32_t alphabet_len=0;
1966  char* alphabet_str=get_string(alphabet_len);
1967  ASSERT(alphabet_str)
1968  CAlphabet* alphabet=NULL;
1969  alphabet=new CAlphabet(alphabet_str, alphabet_len);
1970  SG_FREE(alphabet_str);
1971 
1972  feat=new CStringFeatures<uint8_t>(alphabet);
1973  if (!((CStringFeatures<uint8_t>*) feat)->set_features(fmatrix, num_str, max_str_len))
1974  {
1975  SG_UNREF(alphabet);
1976  SG_UNREF(feat);
1977  SG_ERROR("Couldnt set byte string features.\n")
1978  }
1979  feat=create_custom_string_features((CStringFeatures<uint8_t>*) feat);
1980  break;
1981  }
1982 
1983  default:
1984  SG_ERROR("Wrong argument type %d.\n", get_argument_type())
1985  }
1986 
1987  if (check_dot && !feat->has_property(FP_DOT))
1988  {
1989  SG_UNREF(feat);
1990  SG_ERROR("Feature type not supported by DOT Features\n")
1991  }
1992 
1993  if (strmatch(target, "TRAIN"))
1994  {
1995  if (!add)
1996  ui_features->set_train_features(feat);
1997  else if (check_dot)
1998  {
1999  for (int32_t i=0; i<repetitions; i++)
2000  ui_features->add_train_dotfeatures((CDotFeatures*) feat);
2001  }
2002  else
2003  {
2004  for (int32_t i=0; i<repetitions; i++)
2005  ui_features->add_train_features(feat);
2006  }
2007  }
2008  else
2009  {
2010  if (!add)
2011  ui_features->set_test_features(feat);
2012  else if (check_dot)
2013  {
2014  for (int32_t i=0; i<repetitions; i++)
2015  ui_features->add_test_dotfeatures((CDotFeatures*) feat);
2016  }
2017  else
2018  {
2019  for (int32_t i=0; i<repetitions; i++)
2020  ui_features->add_test_features(feat);
2021  }
2022  }
2023 
2024  SG_FREE(target);
2025 
2026  return true;
2027 }
2028 
2029 bool CSGInterface::cmd_set_reference_features()
2030 {
2031  if (m_nrhs<3 || !create_return_values(0))
2032  return false;
2033 
2034  int32_t len=0;
2035  char* target=get_str_from_str_or_direct(len);
2036 
2037  bool success=ui_features->set_reference_features(target);
2038 
2039  SG_FREE(target);
2040  return success;
2041 }
2042 
2043 bool CSGInterface::cmd_del_last_features()
2044 {
2045  if (m_nrhs<2 || !create_return_values(0))
2046  return false;
2047 
2048  int32_t len=0;
2049  char* target=get_str_from_str_or_direct(len);
2050  bool success=ui_features->del_last_feature_obj(target);
2051 
2052  SG_FREE(target);
2053  return success;
2054 }
2055 
2056 bool CSGInterface::cmd_convert()
2057 {
2058  if (m_nrhs<5 || !create_return_values(0))
2059  return false;
2060 
2061  int32_t len=0;
2062  char* target=get_str_from_str_or_direct(len);
2063  CFeatures* features=ui_features->get_convert_features(target);
2064  if (!features)
2065  {
2066  SG_FREE(target);
2067  SG_ERROR("No \"%s\" features available.\n", target)
2068  }
2069 
2070  char* from_class=get_str_from_str_or_direct(len);
2071  char* from_type=get_str_from_str_or_direct(len);
2072  char* to_class=get_str_from_str_or_direct(len);
2073  char* to_type=get_str_from_str_or_direct(len);
2074 
2075  CFeatures* result=NULL;
2076  if (strmatch(from_class, "SIMPLE"))
2077  {
2078  if (strmatch(from_type, "REAL"))
2079  {
2080  if (strmatch(to_class, "SPARSE") &&
2081  strmatch(to_type, "REAL"))
2082  {
2083  result=ui_features->convert_simple_real_to_sparse_real(
2084  ((CDenseFeatures<float64_t>*) features));
2085  }
2086  else
2088  } // from_type REAL
2089 
2090  else if (strmatch(from_type, "CHAR"))
2091  {
2092  if (strmatch(to_class, "STRING") &&
2093  strmatch(to_type, "CHAR"))
2094  {
2095  result=ui_features->convert_simple_char_to_string_char(
2096  ((CDenseFeatures<char>*) features));
2097  }
2098  else if (strmatch(to_class, "SIMPLE"))
2099  {
2100  if (strmatch(to_type, "ALIGN") && m_nrhs==8)
2101  {
2102  float64_t gap_cost=get_real_from_real_or_str();
2103  result=ui_features->convert_simple_char_to_simple_align(
2104  (CDenseFeatures<char>*) features, gap_cost);
2105  }
2106  else
2108  }
2109  else
2111  } // from_type CHAR
2112 
2113  else if (strmatch(from_type, "WORD"))
2114  {
2115  if (strmatch(to_class, "SIMPLE") &&
2116  strmatch(to_type, "SALZBERG"))
2117  {
2118  result=ui_features->convert_simple_word_to_simple_salzberg(
2119  (CDenseFeatures<uint16_t>*) features);
2120  }
2121  else
2123  } // from_type WORD
2124 
2125  else
2127  } // from_class SIMPLE
2128 
2129  else if (strmatch(from_class, "SPARSE"))
2130  {
2131  if (strmatch(from_type, "REAL"))
2132  {
2133  if (strmatch(to_class, "SIMPLE") &&
2134  strmatch(to_type, "REAL"))
2135  {
2136  result=ui_features->convert_sparse_real_to_simple_real(
2137  (CSparseFeatures<float64_t>*) features);
2138  }
2139  else
2141  } // from_type REAL
2142  else
2144  } // from_class SPARSE
2145 
2146  else if (strmatch(from_class, "STRING"))
2147  {
2148  if (strmatch(from_type, "CHAR"))
2149  {
2150  if (strmatch(to_class, "STRING"))
2151  {
2152  int32_t order=1;
2153  int32_t start=0;
2154  int32_t gap=0;
2155  char rev='f';
2156 
2157  if (m_nrhs>6)
2158  {
2159  order=get_int_from_int_or_str();
2160 
2161  if (m_nrhs>7)
2162  {
2163  start=get_int_from_int_or_str();
2164 
2165  if (m_nrhs>8)
2166  {
2167  gap=get_int_from_int_or_str();
2168 
2169  if (m_nrhs>9)
2170  {
2171  char* rev_str=get_str_from_str_or_direct(len);
2172  if (rev_str)
2173  rev=rev_str[0];
2174 
2175  SG_FREE(rev_str);
2176  }
2177  }
2178  }
2179  }
2180 
2181  if (strmatch(to_type, "BYTE"))
2182  {
2183  result=ui_features->convert_string_char_to_string_generic<char,uint8_t>(
2184  (CStringFeatures<char>*) features, order, start,
2185  gap, rev);
2186  }
2187  else if (strmatch(to_type, "WORD"))
2188  {
2189  result=ui_features->convert_string_char_to_string_generic<char,uint16_t>(
2190  (CStringFeatures<char>*) features, order, start,
2191  gap, rev);
2192  }
2193  else if (strmatch(to_type, "ULONG"))
2194  {
2195  result=ui_features->convert_string_char_to_string_generic<char,uint64_t>(
2196  (CStringFeatures<char>*) features, order, start,
2197  gap, rev);
2198  }
2199  else
2201  }
2202  else
2204  } // from_type CHAR
2205 
2206  else if (strmatch(from_type, "BYTE"))
2207  {
2208  if (strmatch(to_class, "STRING"))
2209  {
2210  int32_t order=1;
2211  int32_t start=0;
2212  int32_t gap=0;
2213  char rev='f';
2214 
2215  if (m_nrhs>6)
2216  {
2217  order=get_int_from_int_or_str();
2218 
2219  if (m_nrhs>7)
2220  {
2221  start=get_int_from_int_or_str();
2222 
2223  if (m_nrhs>8)
2224  {
2225  gap=get_int_from_int_or_str();
2226 
2227  if (m_nrhs>9)
2228  {
2229  char* rev_str=get_str_from_str_or_direct(len);
2230  if (rev_str)
2231  rev=rev_str[0];
2232 
2233  SG_FREE(rev_str);
2234  }
2235  }
2236  }
2237  }
2238 
2239  if (strmatch(to_type, "WORD"))
2240  {
2241  result=ui_features->convert_string_char_to_string_generic<uint8_t,uint16_t>(
2242  (CStringFeatures<uint8_t>*) features, order, start,
2243  gap, rev);
2244  }
2245  else if (strmatch(to_type, "ULONG"))
2246  {
2247  result=ui_features->convert_string_char_to_string_generic<uint8_t,uint64_t>(
2248  (CStringFeatures<uint8_t>*) features, order, start,
2249  gap, rev);
2250  }
2251  else
2253  }
2254  else
2256  } // from_type uint8_t
2257 
2258  else if (strmatch(from_type, "WORD"))
2259  {
2260  if (strmatch(to_class, "SIMPLE") && strmatch(to_type, "TOP"))
2261  {
2262  result=ui_features->convert_string_word_to_simple_top(
2263  (CStringFeatures<uint16_t>*) features);
2264  }
2265  else if (strmatch(to_class, "SPEC") && strmatch(to_type, "WORD") && m_nrhs==7)
2266  {
2267  bool use_norm=get_bool();
2268  result=ui_features->convert_string_byte_to_spec_word((CStringFeatures<uint16_t>*) features, use_norm);
2269 
2270  }
2271  else
2273  } // from_type WORD
2274 
2275  else if (strmatch(to_class, "SIMPLE") && strmatch(to_type, "FK"))
2276  {
2277  result=ui_features->convert_string_word_to_simple_fk(
2278  (CStringFeatures<uint16_t>*) features);
2279  } // to_type FK
2280 
2281  else
2283 
2284  } // from_class STRING
2285 
2286  if (result && ui_features->set_convert_features(result, target))
2287  SG_INFO("Conversion was successful.\n")
2288  else
2289  SG_ERROR("Conversion failed.\n")
2290 
2291  SG_FREE(target);
2292  SG_FREE(from_class);
2293  SG_FREE(from_type);
2294  SG_FREE(to_class);
2295  SG_FREE(to_type);
2296  return (result!=NULL);
2297 }
2298 
2299 void CSGInterface::convert_to_bitembedding(CFeatures* &features, bool convert_to_word, bool convert_to_ulong)
2300 {
2301  int32_t order=1;
2302  int32_t start=0;
2303  int32_t gap=0;
2304  char rev='f';
2305 
2306  if (m_nrhs<5)
2307  return;
2308 
2309  order=get_int();
2310  // remove arg, for parameters to come
2311  m_nrhs--;
2312 
2313  if (convert_to_word)
2314  {
2315  SG_INFO("Converting into word-bitembedding\n")
2316  features=ui_features->convert_string_char_to_string_generic<char,uint16_t>(
2317  (CStringFeatures<char>*) features, order, start, gap, rev);
2318  }
2319 
2320  if (convert_to_ulong)
2321  {
2322  SG_INFO("Converting into ulong-bitembedding\n")
2323  features=ui_features->convert_string_char_to_string_generic<char,uint64_t>(
2324  (CStringFeatures<char>*) features, order, start, gap, rev);
2325  }
2326 }
2327 
2328 void CSGInterface::obtain_from_single_string(CFeatures* features)
2329 {
2330  if (m_nrhs<5)
2331  return;
2332 
2333  int32_t len=0;
2334  char* str=get_string(len);
2335  ASSERT(str)
2336 
2337  if (strmatch(str, "from_position_list"))
2338  {
2339  obtain_from_position_list(features);
2340  }
2341  else if (strmatch(str, "slide_window"))
2342  {
2343  obtain_by_sliding_window(features);
2344  }
2345  else
2346  SG_SERROR("Unknown conversion\n")
2347 }
2348 
2349 bool CSGInterface::obtain_from_position_list(CFeatures* features)
2350 {
2351  int32_t winsize=get_int();
2352 
2353  int32_t* shifts=NULL;
2354  int32_t num_shift=0;
2355  get_vector(shifts, num_shift);
2356 
2357  int32_t skip=0;
2358  if (m_nrhs==8)
2359  skip=get_int();
2360 
2361  SG_DEBUG("winsize: %d num_shifts: %d skip: %d\n", winsize, num_shift, skip)
2362 
2363  CDynamicArray<int32_t> positions(num_shift+1);
2364 
2365  for (int32_t i=0; i<num_shift; i++)
2366  positions.set_element(shifts[i], i);
2367 
2368  if (features->get_feature_class()!=C_STRING)
2369  SG_ERROR("No string features.\n")
2370 
2371  bool success=false;
2372  switch (features->get_feature_type())
2373  {
2374  case F_CHAR:
2375  {
2376  success=(((CStringFeatures<char>*) features)->
2377  obtain_by_position_list(winsize, &positions, skip)>0);
2378  break;
2379  }
2380  case F_BYTE:
2381  {
2382  success=(((CStringFeatures<uint8_t>*) features)->
2383  obtain_by_position_list(winsize, &positions, skip)>0);
2384  break;
2385  }
2386  case F_WORD:
2387  {
2388  success=(((CStringFeatures<uint16_t>*) features)->
2389  obtain_by_position_list(winsize, &positions, skip)>0);
2390  break;
2391  }
2392  case F_ULONG:
2393  {
2394  success=(((CStringFeatures<uint64_t>*) features)->
2395  obtain_by_position_list(winsize, &positions, skip)>0);
2396  break;
2397  }
2398  default:
2399  SG_ERROR("Unsupported string features type.\n")
2400  }
2401 
2402  return success;
2403 }
2404 
2405 bool CSGInterface::obtain_by_sliding_window(CFeatures* features)
2406 {
2407  int32_t winsize=get_int();
2408  int32_t shift=get_int();
2409  int32_t skip=0;
2410 
2411  if (m_nrhs==8)
2412  skip=get_int();
2413 
2414  bool success=false;
2415 
2416  ASSERT(features)
2417  ASSERT(((CFeatures*) features)->get_feature_class()==C_STRING)
2418 
2419  switch (features->get_feature_type())
2420  {
2421  case F_CHAR:
2422  return ( ((CStringFeatures<char>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2423  case F_BYTE:
2424  return ( ((CStringFeatures<uint8_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2425  case F_WORD:
2426  return ( ((CStringFeatures<uint16_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2427  case F_ULONG:
2428  return ( ((CStringFeatures<uint64_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2429  default:
2430  SG_SERROR("Unsupported string features type.\n")
2431  return false;
2432  }
2433 
2434  return success;
2435 }
2436 
2437 bool CSGInterface::cmd_reshape()
2438 {
2439  if (m_nrhs<4 || !create_return_values(0))
2440  return false;
2441 
2442  int32_t len=0;
2443  char* target=get_str_from_str_or_direct(len);
2444  int32_t num_feat=get_int_from_int_or_str();
2445  int32_t num_vec=get_int_from_int_or_str();
2446 
2447  bool success=ui_features->reshape(target, num_feat, num_vec);
2448 
2449  SG_FREE(target);
2450  return success;
2451 }
2452 
2453 bool CSGInterface::cmd_load_labels()
2454 {
2455  if (m_nrhs<4 || !create_return_values(0))
2456  return false;
2457 
2458  int32_t len=0;
2459  char* filename=get_str_from_str_or_direct(len);
2460  char* target=get_str_from_str_or_direct(len);
2461 
2462  bool success=ui_labels->load(filename, target);
2463 
2464  SG_FREE(filename);
2465  SG_FREE(target);
2466  return success;
2467 }
2468 
2469 bool CSGInterface::cmd_set_labels()
2470 {
2471  if (m_nrhs!=3 || !create_return_values(0))
2472  return false;
2473 
2474  int32_t tlen=0;
2475  char* target=get_string(tlen);
2476  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
2477  {
2478  SG_FREE(target);
2479  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
2480  }
2481 
2482  float64_t* lab=NULL;
2483  int32_t len=0;
2484  get_vector(lab, len);
2485 
2486  CLabels* labels=ui_labels->infer_labels(lab, len);
2487 
2488  SG_INFO("num labels: %d\n", labels->get_num_labels())
2489 
2490  if (strmatch(target, "TRAIN"))
2491  ui_labels->set_train_labels(labels);
2492  else if (strmatch(target, "TEST"))
2493  ui_labels->set_test_labels(labels);
2494  else
2495  {
2496  SG_FREE(target);
2497  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
2498  }
2499  SG_FREE(target);
2500 
2501  return true;
2502 }
2503 
2504 bool CSGInterface::cmd_get_labels()
2505 {
2506  if (m_nrhs!=2 || !create_return_values(1))
2507  return false;
2508 
2509  int32_t tlen=0;
2510  char* target=get_string(tlen);
2511  CLabels* labels=NULL;
2512 
2513  if (strmatch(target, "TRAIN"))
2514  labels=ui_labels->get_train_labels();
2515  else if (strmatch(target, "TEST"))
2516  labels=ui_labels->get_test_labels();
2517  else
2518  {
2519  SG_FREE(target);
2520  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
2521  }
2522  SG_FREE(target);
2523 
2524  if (!labels)
2525  SG_ERROR("No labels.\n")
2526 
2527  //FIXME
2528  SGVector<float64_t> lab=((CBinaryLabels*) labels)->get_labels();
2529 
2530  set_vector(lab.vector, lab.vlen);
2531  return true;
2532 }
2533 
2534 
2537 bool CSGInterface::cmd_set_kernel_normalization()
2538 {
2539  if (m_nrhs<2 || !create_return_values(0))
2540  return false;
2541 
2542  int32_t len=0;
2543  char* normalization=get_string(len);
2544 
2545  float64_t c=0;
2546  float64_t r=0;
2547 
2548  if (m_nrhs>=3)
2549  c=get_real();
2550  if (m_nrhs>=4)
2551  r=get_real();
2552 
2553  bool success=ui_kernel->set_normalization(normalization, c, r);
2554 
2555  SG_FREE(normalization);
2556  return success;
2557 }
2558 
2559 bool CSGInterface::cmd_set_kernel()
2560 {
2561  if (m_nrhs<2 || !create_return_values(0))
2562  return false;
2563 
2564  SG_DEBUG("SGInterface: set_kernel\n")
2565  CKernel* kernel=create_kernel();
2566  return ui_kernel->set_kernel(kernel);
2567 }
2568 
2569 bool CSGInterface::cmd_add_kernel()
2570 {
2571  if (m_nrhs<3 || !create_return_values(0))
2572  return false;
2573 
2574  float64_t weight=get_real_from_real_or_str();
2575  // adjust m_nrhs to play well with checks in create_kernel
2576  m_nrhs--;
2577  CKernel* kernel=create_kernel();
2578 
2579  SG_DEBUG("SGInterface: add_kernel\n")
2580  return ui_kernel->add_kernel(kernel, weight);
2581 }
2582 
2583 bool CSGInterface::cmd_del_last_kernel()
2584 {
2585  if (m_nrhs<1 || !create_return_values(0))
2586  return false;
2587 
2588  return ui_kernel->del_last_kernel();
2589 }
2590 
2591 CKernel* CSGInterface::create_kernel()
2592 {
2593  CKernel* kernel=NULL;
2594  int32_t len=0;
2595  char* type=get_str_from_str_or_direct(len);
2596 
2597  SG_DEBUG("set_kernel with type: %s\n", type)
2598 
2599  if (strmatch(type, "COMBINED"))
2600  {
2601  if (m_nrhs<3)
2602  return NULL;
2603 
2604  int32_t size=get_int_from_int_or_str();
2605  bool append_subkernel_weights=false;
2606  if (m_nrhs>3)
2607  append_subkernel_weights=get_bool_from_bool_or_str();
2608 
2609  kernel=ui_kernel->create_combined(size, append_subkernel_weights);
2610  }
2611  else if (strmatch(type, "DISTANCE"))
2612  {
2613  if (m_nrhs<3)
2614  return NULL;
2615 
2616  int32_t size=get_int_from_int_or_str();
2617  float64_t width=1;
2618  if (m_nrhs>3)
2619  width=get_real_from_real_or_str();
2620 
2621  kernel=ui_kernel->create_distance(size, width);
2622  }
2623  else if (strmatch(type, "WAVELET"))
2624  {
2625 
2626  if (m_nrhs<4)
2627  return NULL;
2628 
2629  char* dtype=get_str_from_str_or_direct(len);
2630  if (strmatch(dtype, "REAL"))
2631  {
2632  int32_t size=get_int_from_int_or_str();
2633  float64_t Wdilation=5.0;
2634  float64_t Wtranslation=2.0;
2635 
2636  if (m_nrhs>4)
2637  {
2638  Wdilation=get_real_from_real_or_str();
2639 
2640  if (m_nrhs>5)
2641  Wtranslation=get_real_from_real_or_str();
2642  }
2643 
2644  kernel=ui_kernel->create_sigmoid(size, Wdilation, Wtranslation);
2645  }
2646 
2647  SG_FREE(dtype);
2648  }
2649  else if (strmatch(type, "LINEAR"))
2650  {
2651  if (m_nrhs<4)
2652  return NULL;
2653  if (m_nrhs>5)
2654  return NULL;
2655 
2656  char* dtype=get_str_from_str_or_direct(len);
2657  int32_t size=get_int_from_int_or_str();
2658  float64_t scale=-1;
2659  if (m_nrhs==5)
2660  scale=get_real_from_real_or_str();
2661 
2662  if (strmatch(dtype, "BYTE"))
2663  kernel=ui_kernel->create_linearbyte(size, scale);
2664  else if (strmatch(dtype, "WORD"))
2665  kernel=ui_kernel->create_linearword(size, scale);
2666  else if (strmatch(dtype, "CHAR"))
2667  kernel=ui_kernel->create_linearstring(size, scale);
2668  else if (strmatch(dtype, "REAL"))
2669  kernel=ui_kernel->create_linear(size, scale);
2670  else if (strmatch(dtype, "SPARSEREAL"))
2671  kernel=ui_kernel->create_sparselinear(size, scale);
2672 
2673  SG_FREE(dtype);
2674  }
2675  else if (strmatch(type, "HISTOGRAM"))
2676  {
2677  if (m_nrhs<4)
2678  return NULL;
2679 
2680  char* dtype=get_str_from_str_or_direct(len);
2681  if (strmatch(dtype, "WORD"))
2682  {
2683  int32_t size=get_int_from_int_or_str();
2684  kernel=ui_kernel->create_histogramword(size);
2685  }
2686 
2687  SG_FREE(dtype);
2688  }
2689  else if (strmatch(type, "SALZBERG"))
2690  {
2691  if (m_nrhs<4)
2692  return NULL;
2693 
2694  char* dtype=get_str_from_str_or_direct(len);
2695  if (strmatch(dtype, "WORD"))
2696  {
2697  int32_t size=get_int_from_int_or_str();
2698  kernel=ui_kernel->create_salzbergword(size);
2699  }
2700 
2701  SG_FREE(dtype);
2702  }
2703  else if (strmatch(type, "POLYMATCH"))
2704  {
2705  if (m_nrhs<4)
2706  return NULL;
2707 
2708  char* dtype=get_str_from_str_or_direct(len);
2709  int32_t size=get_int_from_int_or_str();
2710  int32_t degree=3;
2711  bool inhomogene=false;
2712  bool normalize=true;
2713 
2714  if (m_nrhs>4)
2715  {
2716  degree=get_int_from_int_or_str();
2717  if (m_nrhs>5)
2718  {
2719  inhomogene=get_bool_from_bool_or_str();
2720  if (m_nrhs>6)
2721  normalize=get_bool_from_bool_or_str();
2722  }
2723  }
2724 
2725  if (strmatch(dtype, "CHAR"))
2726  {
2727  kernel=ui_kernel->create_polymatchstring(
2728  size, degree, inhomogene, normalize);
2729  }
2730  else if (strmatch(dtype, "WORD"))
2731  {
2732  kernel=ui_kernel->create_polymatchwordstring(
2733  size, degree, inhomogene, normalize);
2734  }
2735 
2736  SG_FREE(dtype);
2737  }
2738  else if (strmatch(type, "MATCH"))
2739  {
2740  if (m_nrhs<4)
2741  return NULL;
2742 
2743  char* dtype=get_str_from_str_or_direct(len);
2744  if (strmatch(dtype, "WORD"))
2745  {
2746  int32_t size=get_int_from_int_or_str();
2747  int32_t d=3;
2748  bool normalize=true;
2749 
2750  if (m_nrhs>4)
2751  d=get_int_from_int_or_str();
2752  if (m_nrhs>5)
2753  normalize=get_bool_from_bool_or_str();
2754 
2755  kernel=ui_kernel->create_matchwordstring(size, d, normalize);
2756  }
2757 
2758  SG_FREE(dtype);
2759  }
2760  else if (strmatch(type, "WEIGHTEDCOMMSTRING") || strmatch(type, "COMMSTRING"))
2761  {
2762  char* dtype=get_str_from_str_or_direct(len);
2763  int32_t size=get_int_from_int_or_str();
2764  bool use_sign=false;
2765  char* norm_str=NULL;
2766 
2767  if (m_nrhs>4)
2768  {
2769  use_sign=get_bool_from_bool_or_str();
2770 
2771  if (m_nrhs>5)
2772  norm_str=get_str_from_str_or_direct(len);
2773  }
2774 
2775  if (strmatch(dtype, "WORD"))
2776  {
2777  if (strmatch(type, "WEIGHTEDCOMMSTRING"))
2778  {
2779  kernel=ui_kernel->create_commstring(
2780  size, use_sign, norm_str, K_WEIGHTEDCOMMWORDSTRING);
2781  }
2782  else if (strmatch(type, "COMMSTRING"))
2783  {
2784  kernel=ui_kernel->create_commstring(
2785  size, use_sign, norm_str, K_COMMWORDSTRING);
2786  }
2787  }
2788  else if (strmatch(dtype, "ULONG"))
2789  {
2790  kernel=ui_kernel->create_commstring(
2791  size, use_sign, norm_str, K_COMMULONGSTRING);
2792  }
2793 
2794  SG_FREE(dtype);
2795  SG_FREE(norm_str);
2796  }
2797  else if (strmatch(type, "CHI2"))
2798  {
2799  if (m_nrhs<4)
2800  return NULL;
2801 
2802  char* dtype=get_str_from_str_or_direct(len);
2803  if (strmatch(dtype, "REAL"))
2804  {
2805  int32_t size=get_int_from_int_or_str();
2806  float64_t width=1;
2807 
2808  if (m_nrhs>4)
2809  width=get_real_from_real_or_str();
2810 
2811  kernel=ui_kernel->create_chi2(size, width);
2812  }
2813 
2814  SG_FREE(dtype);
2815  }
2816  else if (strmatch(type, "FIXEDDEGREE"))
2817  {
2818  if (m_nrhs<4)
2819  return NULL;
2820 
2821  char* dtype=get_str_from_str_or_direct(len);
2822  if (strmatch(dtype, "CHAR"))
2823  {
2824  int32_t size=get_int_from_int_or_str();
2825  int32_t d=3;
2826  if (m_nrhs>4)
2827  d=get_int_from_int_or_str();
2828 
2829  kernel=ui_kernel->create_fixeddegreestring(size, d);
2830  }
2831 
2832  SG_FREE(dtype);
2833  }
2834  else if (strmatch(type, "LOCALALIGNMENT"))
2835  {
2836  if (m_nrhs<4)
2837  return NULL;
2838 
2839  char* dtype=get_str_from_str_or_direct(len);
2840  if (strmatch(dtype, "CHAR"))
2841  {
2842  int32_t size=get_int_from_int_or_str();
2843 
2844  kernel=ui_kernel->create_localalignmentstring(size);
2845  }
2846 
2847  SG_FREE(dtype);
2848  }
2849  else if (strmatch(type, "OLIGO"))
2850  {
2851  if (m_nrhs<6)
2852  return NULL;
2853 
2854  char* dtype=get_str_from_str_or_direct(len);
2855  if (strmatch(dtype, "CHAR"))
2856  {
2857  int32_t size=get_int_from_int_or_str();
2858  int32_t k=get_int_from_int_or_str();
2859  float64_t w=get_real_from_real_or_str();
2860 
2861  kernel=ui_kernel->create_oligo(size, k, w);
2862  }
2863 
2864  SG_FREE(dtype);
2865  }
2866  else if (strmatch(type, "WEIGHTEDDEGREEPOS2") ||
2867  strmatch(type, "WEIGHTEDDEGREEPOS2_NONORM"))
2868  {
2869  if (m_nrhs<7)
2870  return NULL;
2871 
2872  char* dtype=get_str_from_str_or_direct(len);
2873  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2874  {
2875  int32_t size=get_int_from_int_or_str();
2876  int32_t order=get_int_from_int_or_str();
2877  int32_t max_mismatch=get_int_from_int_or_str();
2878  int32_t length=get_int_from_int_or_str();
2879  int32_t* shifts=NULL;
2880  int32_t l=0;
2881  get_vector_from_int_vector_or_str(shifts, l);
2882 
2883  ASSERT(l==length)
2884 
2885  bool use_normalization=true;
2886  if (strmatch(type, "WEIGHTEDDEGREEPOS2_NONORM"))
2887  use_normalization=false;
2888 
2889  kernel=ui_kernel->create_weighteddegreepositionstring2(
2890  size, order, max_mismatch, shifts, length,
2891  use_normalization);
2892 
2893  SG_FREE(shifts);
2894  }
2895 
2896  SG_FREE(dtype);
2897  }
2898  else if (strmatch(type, "WEIGHTEDDEGREEPOS3"))
2899  {
2900  if (m_nrhs<7)
2901  return NULL;
2902 
2903  char* dtype=get_str_from_str_or_direct(len);
2904  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2905  {
2906  int32_t size=get_int_from_int_or_str();
2907  int32_t order=get_int_from_int_or_str();
2908  int32_t max_mismatch=get_int_from_int_or_str();
2909  int32_t length=get_int_from_int_or_str();
2910  int32_t mkl_stepsize=get_int_from_int_or_str();
2911  int32_t* shifts=NULL;
2912  int32_t l=0;
2913  get_vector_from_int_vector_or_str(shifts, l);
2914  ASSERT(l==length)
2915 
2916  float64_t* position_weights=NULL;
2917  if (m_nrhs>9+length)
2918  {
2919  get_vector_from_real_vector_or_str(
2920  position_weights, length);
2921  }
2922 
2923  kernel=ui_kernel->create_weighteddegreepositionstring3(
2924  size, order, max_mismatch, shifts, length,
2925  mkl_stepsize, position_weights);
2926 
2927  SG_FREE(position_weights);
2928  SG_FREE(shifts);
2929  }
2930 
2931  SG_FREE(dtype);
2932  }
2933  else if (strmatch(type, "WEIGHTEDDEGREEPOS"))
2934  {
2935  if (m_nrhs<4)
2936  return NULL;
2937 
2938  char* dtype=get_str_from_str_or_direct(len);
2939  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2940  {
2941  int32_t size=get_int_from_int_or_str();
2942  int32_t order=3;
2943  int32_t max_mismatch=0;
2944  int32_t length=0;
2945  int32_t center=0;
2946  float64_t step=1;
2947 
2948  if (m_nrhs>4)
2949  {
2950  order=get_int_from_int_or_str();
2951 
2952  if (m_nrhs>5)
2953  {
2954  max_mismatch=get_int_from_int_or_str();
2955 
2956  if (m_nrhs>6)
2957  {
2958  length=get_int_from_int_or_str();
2959 
2960  if (m_nrhs>7)
2961  {
2962  center=get_int_from_int_or_str();
2963 
2964  if (m_nrhs>8)
2965  step=get_real_from_real_or_str();
2966  }
2967  }
2968  }
2969  }
2970 
2971  kernel=ui_kernel->create_weighteddegreepositionstring(
2972  size, order, max_mismatch, length, center, step);
2973  }
2974 
2975  SG_FREE(dtype);
2976  }
2977  else if (strmatch(type, "WEIGHTEDDEGREE"))
2978  {
2979  if (m_nrhs<4)
2980  return NULL;
2981 
2982  char* dtype=get_str_from_str_or_direct(len);
2983  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2984  {
2985  int32_t size=get_int_from_int_or_str();
2986  int32_t order=3;
2987  int32_t max_mismatch=0;
2988  bool use_normalization=true;
2989  int32_t mkl_stepsize=1;
2990  bool block_computation=true;
2991  int32_t single_degree=-1;
2992 
2993  if (m_nrhs>4)
2994  {
2995  order=get_int_from_int_or_str();
2996 
2997  if (m_nrhs>5)
2998  {
2999  max_mismatch=get_int_from_int_or_str();
3000 
3001  if (m_nrhs>6)
3002  {
3003  use_normalization=get_bool_from_bool_or_str();
3004 
3005  if (m_nrhs>7)
3006  {
3007  mkl_stepsize=get_int_from_int_or_str();
3008 
3009  if (m_nrhs>8)
3010  {
3011  block_computation=get_int_from_int_or_str();
3012 
3013  if (m_nrhs>9)
3014  single_degree=get_int_from_int_or_str();
3015  }
3016  }
3017  }
3018  }
3019  }
3020 
3021  kernel=ui_kernel->create_weighteddegreestring(
3022  size, order, max_mismatch, use_normalization,
3023  mkl_stepsize, block_computation, single_degree);
3024  }
3025 
3026  SG_FREE(dtype);
3027  }
3028  else if (strmatch(type, "WEIGHTEDDEGREERBF"))
3029  {
3030  if (m_nrhs<5)
3031  return NULL;
3032 
3033  char* dtype=get_str_from_str_or_direct(len);
3034  int32_t size=get_int_from_int_or_str();
3035  int32_t nof_properties=get_int_from_int_or_str();
3036  int32_t degree=1;
3037  float64_t width=1;
3038  if (m_nrhs>5)
3039  {
3040  degree=get_int_from_int_or_str();
3041  if (m_nrhs>6)
3042  {
3043  width=get_real_from_real_or_str();
3044  }
3045 
3046  }
3047  //if (strmatch(dtype, "REAL"))
3048 
3049  kernel=ui_kernel->create_weighteddegreerbf(size, degree, nof_properties, width);
3050 
3051  SG_FREE(dtype);
3052 
3053  }
3054  else if (strmatch(type, "SPECTRUMMISMATCHRBF"))
3055  {
3056  if (m_nrhs<7)
3057  return NULL;
3058 
3059  char* dtype=get_str_from_str_or_direct(len);
3060  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
3061  {
3062  int32_t size=get_int_from_int_or_str();
3063  int32_t degree=get_int_from_int_or_str();
3064  int32_t max_mismatch=get_int_from_int_or_str();
3065  float64_t width=get_real_from_real_or_str();
3066  float64_t* AA_matrix = NULL;
3067 
3068  //int32_t length=128*128;
3069  //get_vector_from_real_vector_or_str(AA_matrix, length);
3070  float64_t* helper_matrix=NULL;
3071  int32_t N=0;
3072  int32_t M=0;
3073  get_matrix(helper_matrix, N, M);
3074 
3075  if (N == 128 && M == 128)
3076  {
3077  AA_matrix=SG_MALLOC(float64_t, N*M);
3078  memcpy(AA_matrix, helper_matrix, N*M*sizeof(float64_t)) ;
3079  kernel=ui_kernel->create_spectrummismatchrbf(size, AA_matrix, 128, 128, max_mismatch, degree, width);
3080  }
3081  else
3082  {
3083  SG_ERROR("Matrix size %d %d\n", N, M)
3084  }
3085  }
3086  SG_FREE(dtype);
3087 
3088  }
3089 
3090  else if (strmatch(type, "SLIK") || strmatch(type, "LIK"))
3091  {
3092  if (m_nrhs<4)
3093  return NULL;
3094 
3095  char* dtype=get_str_from_str_or_direct(len);
3096  if (strmatch(dtype, "CHAR"))
3097  {
3098  int32_t size=get_int_from_int_or_str();
3099  int32_t length=3;
3100  int32_t inner_degree=3;
3101  int32_t outer_degree=1;
3102 
3103  if (m_nrhs>4)
3104  {
3105  length=get_int_from_int_or_str();
3106 
3107  if (m_nrhs>5)
3108  {
3109  inner_degree=get_int_from_int_or_str();
3110 
3111  if (m_nrhs>6)
3112  outer_degree=get_int_from_int_or_str();
3113  }
3114  }
3115 
3116  if (strmatch(type, "SLIK"))
3117  {
3118  kernel=ui_kernel->create_localityimprovedstring(
3119  size, length, inner_degree, outer_degree,
3121  }
3122  else
3123  {
3124  kernel=ui_kernel->create_localityimprovedstring(
3125  size, length, inner_degree, outer_degree,
3127  }
3128  }
3129 
3130  SG_FREE(dtype);
3131  }
3132  else if (strmatch(type, "POLY"))
3133  {
3134  if (m_nrhs<4)
3135  return NULL;
3136 
3137  char* dtype=get_str_from_str_or_direct(len);
3138  int32_t size=get_int_from_int_or_str();
3139  int32_t degree=2;
3140  bool inhomogene=false;
3141  bool normalize=true;
3142 
3143  if (m_nrhs>4)
3144  {
3145  degree=get_int_from_int_or_str();
3146 
3147  if (m_nrhs>5)
3148  {
3149  inhomogene=get_bool_from_bool_or_str();
3150 
3151  if (m_nrhs>6)
3152  normalize=get_bool_from_bool_or_str();
3153  }
3154  }
3155 
3156  if (strmatch(dtype, "REAL"))
3157  {
3158  kernel=ui_kernel->create_poly(
3159  size, degree, inhomogene, normalize);
3160  }
3161  else if (strmatch(dtype, "SPARSEREAL"))
3162  {
3163  kernel=ui_kernel->create_sparsepoly(
3164  size, degree, inhomogene, normalize);
3165  }
3166 
3167  SG_FREE(dtype);
3168  }
3169  else if (strmatch(type, "SIGMOID"))
3170  {
3171  if (m_nrhs<4)
3172  return NULL;
3173 
3174  char* dtype=get_str_from_str_or_direct(len);
3175  if (strmatch(dtype, "REAL"))
3176  {
3177  int32_t size=get_int_from_int_or_str();
3178  float64_t gamma=0.01;
3179  float64_t coef0=0;
3180 
3181  if (m_nrhs>4)
3182  {
3183  gamma=get_real_from_real_or_str();
3184 
3185  if (m_nrhs>5)
3186  coef0=get_real_from_real_or_str();
3187  }
3188 
3189  kernel=ui_kernel->create_sigmoid(size, gamma, coef0);
3190  }
3191 
3192  SG_FREE(dtype);
3193  }
3194  else if (strmatch(type, "GAUSSIAN")) // RBF
3195  {
3196  if (m_nrhs<4)
3197  return NULL;
3198 
3199  char* dtype=get_str_from_str_or_direct(len);
3200  int32_t size=get_int_from_int_or_str();
3201  float64_t width=1;
3202  if (m_nrhs>4)
3203  width=get_real_from_real_or_str();
3204 
3205  if (strmatch(dtype, "REAL"))
3206  kernel=ui_kernel->create_gaussian(size, width);
3207  else if (strmatch(dtype, "SPARSEREAL"))
3208  kernel=ui_kernel->create_sparsegaussian(size, width);
3209 
3210  SG_FREE(dtype);
3211  }
3212  else if (strmatch(type, "GAUSSIANSHIFT")) // RBF
3213  {
3214  if (m_nrhs<7)
3215  return NULL;
3216 
3217  char* dtype=get_str_from_str_or_direct(len);
3218  if (strmatch(dtype, "REAL"))
3219  {
3220  int32_t size=get_int_from_int_or_str();
3221  float64_t width=get_real_from_real_or_str();
3222  int32_t max_shift=get_int_from_int_or_str();
3223  int32_t shift_step=get_int_from_int_or_str();
3224 
3225  kernel=ui_kernel->create_gaussianshift(
3226  size, width, max_shift, shift_step);
3227  }
3228 
3229  SG_FREE(dtype);
3230  }
3231  else if (strmatch(type, "CUSTOM"))
3232  {
3233  if (m_nrhs!=4 || !create_return_values(0))
3234  return NULL;
3235 
3236  float64_t* kmatrix=NULL;
3237  int32_t num_feat=0;
3238  int32_t num_vec=0;
3239  get_matrix(kmatrix, num_feat, num_vec);
3240 
3241  int32_t tlen=0;
3242  char* ktype=get_string(tlen);
3243 
3244  if (!strmatch(ktype, "DIAG") &&
3245  !strmatch(ktype, "FULL") &&
3246  !strmatch(ktype, "FULL2DIAG"))
3247  {
3248  SG_FREE(ktype);
3249  SG_ERROR("Undefined type, not DIAG, FULL or FULL2DIAG.\n")
3250  }
3251 
3252  bool source_is_diag=false;
3253  bool dest_is_diag=false;
3254 
3255  if (strmatch(ktype, "FULL2DIAG"))
3256  dest_is_diag=true;
3257  else if (strmatch(ktype, "DIAG"))
3258  {
3259  source_is_diag=true;
3260  dest_is_diag=true;
3261  }
3262 
3263  kernel=ui_kernel->create_custom(kmatrix, num_feat, num_vec,
3264  source_is_diag, dest_is_diag);
3265  }
3266  else if (strmatch(type, "CONST"))
3267  {
3268  if (m_nrhs<4)
3269  return NULL;
3270 
3271  char* dtype=get_str_from_str_or_direct(len);
3272  if (strmatch(dtype, "REAL"))
3273  {
3274  int32_t size=get_int_from_int_or_str();
3275  float64_t c=1;
3276  if (m_nrhs>4)
3277  c=get_real_from_real_or_str();
3278 
3279  kernel=ui_kernel->create_const(size, c);
3280  }
3281 
3282  SG_FREE(dtype);
3283  }
3284  else if (strmatch(type, "DIAG"))
3285  {
3286  if (m_nrhs<4)
3287  return NULL;
3288 
3289  char* dtype=get_str_from_str_or_direct(len);
3290  if (strmatch(dtype, "REAL"))
3291  {
3292  int32_t size=get_int_from_int_or_str();
3293  float64_t diag=1;
3294  if (m_nrhs>4)
3295  diag=get_real_from_real_or_str();
3296 
3297  kernel=ui_kernel->create_diag(size, diag);
3298  }
3299 
3300  SG_FREE(dtype);
3301  }
3302 
3303  else if (strmatch(type, "TPPK"))
3304  {
3305  if (m_nrhs!=5)
3306  return NULL;
3307 
3308  char* dtype=get_str_from_str_or_direct(len);
3309  if (strmatch(dtype, "INT"))
3310  {
3311  int32_t size=get_int_from_int_or_str();
3312  float64_t* km=NULL;
3313  int32_t rows=0;
3314  int32_t cols=0;
3315  get_matrix(km, rows, cols);
3316  kernel=ui_kernel->create_tppk(size, km, rows, cols);
3317  }
3318 
3319  SG_FREE(dtype);
3320  }
3321  else
3323 
3324  SG_FREE(type);
3325  SG_DEBUG("created kernel: %p\n", kernel)
3326  return kernel;
3327 }
3328 
3329 
3330 CFeatures* CSGInterface::create_custom_string_features(CStringFeatures<uint8_t>* orig_feat)
3331 {
3332  CFeatures* feat=orig_feat;
3333 
3334  if (m_nrhs>4)
3335  {
3336  int32_t start=-1;
3337  int32_t order=0;
3338  int32_t from_order=0;
3339  bool normalize=true;
3340 
3341  int32_t feature_class_len=0;
3342  char* feature_class_str=get_string(feature_class_len);
3343  ASSERT(feature_class_str)
3344  CAlphabet* alphabet=NULL;
3345  if (strmatch(feature_class_str, "WD"))
3346  {
3347  if (m_nrhs!=7)
3348  SG_ERROR("Please specify alphabet, WD, order, from_order\n")
3349 
3350  alphabet=new CAlphabet(RAWDNA);
3351  order=get_int();
3352  from_order=get_int();
3353  feat = new CWDFeatures((CStringFeatures<uint8_t>*) feat, order, from_order);
3354  }
3355  else if (strmatch(feature_class_str, "WSPEC"))
3356  {
3357  if (m_nrhs!=8)
3358  SG_ERROR("Please specify alphabet, order, WSPEC, start, normalize\n")
3359 
3360  alphabet=new CAlphabet(RAWDNA);
3361  order=get_int();
3362  start=get_int();
3363  normalize=get_bool();
3365  sf->obtain_from_char_features((CStringFeatures<uint8_t>*) feat, start, order, 0, normalize);
3366  sf->add_preprocessor(new CSortWordString());
3367  sf->apply_preprocessor();
3368  SG_UNREF(feat);
3369  feat = new CImplicitWeightedSpecFeatures(sf, normalize);
3370  }
3371  SG_FREE(feature_class_str);
3372 
3373  SG_UNREF(alphabet);
3374  }
3375 
3376  return feat;
3377 }
3378 
3379 CFeatures* CSGInterface::create_custom_real_features(CDenseFeatures<float64_t>* orig_feat)
3380 {
3381  CFeatures* feat=orig_feat;
3382 
3383  if (m_nrhs==6)
3384  {
3385  int32_t degree=0;
3386  int32_t feature_class_len=0;
3387  bool normalize;
3388  char* feature_class_str=get_string(feature_class_len);
3389  ASSERT(feature_class_str)
3390  if (strmatch(feature_class_str, "POLY"))
3391  {
3392  //if (m_nrhs!=7)
3393  // SG_ERROR("Please specify POLY, degree\n")
3394 
3395  degree=get_int();
3396  normalize = get_bool();
3397  feat = new CPolyFeatures((CDenseFeatures<float64_t>*) feat, degree, normalize);
3398 
3399  }
3400  else
3401  SG_ERROR("Unknown feature class: %s\n", feature_class_str)
3402 
3403  SG_FREE(feature_class_str);
3404  }
3405 
3406  return feat;
3407 }
3408 
3409 bool CSGInterface::cmd_init_kernel()
3410 {
3412  return true;
3413 }
3414 
3415 bool CSGInterface::cmd_clean_kernel()
3416 {
3417  if (m_nrhs<1 || !create_return_values(0))
3418  return false;
3419 
3420  return ui_kernel->clean_kernel();
3421 }
3422 
3423 bool CSGInterface::cmd_save_kernel()
3424 {
3425  if (m_nrhs<2 || !create_return_values(0))
3426  return false;
3427 
3428  int32_t len=0;
3429  char* filename=get_str_from_str_or_direct(len);
3430 
3431  bool success=ui_kernel->save_kernel(filename);
3432 
3433  SG_FREE(filename);
3434  return success;
3435 }
3436 
3437 bool CSGInterface::cmd_get_kernel_matrix()
3438 {
3439  if (m_nrhs>2 || !create_return_values(1))
3440  return false;
3441 
3442  int32_t len=0;
3443  char* target=NULL;
3444 
3445  if (m_nrhs==2)
3446  target=get_string(len);
3447  bool success=ui_kernel->init_kernel(target);
3448 
3449  if (success)
3450  {
3451  CKernel* kernel=ui_kernel->get_kernel();
3452  if (!kernel || !kernel->has_features())
3453  SG_ERROR("No kernel defined or not initialized.\n")
3454 
3456  set_matrix(km.matrix, km.num_rows, km.num_cols);
3457  }
3458 
3459  SG_FREE(target);
3460 
3461  return success;
3462 }
3463 
3464 bool CSGInterface::cmd_set_WD_position_weights()
3465 {
3466  if (m_nrhs<2 || m_nrhs>3 || !create_return_values(0))
3467  return false;
3468 
3469  CKernel* kernel=ui_kernel->get_kernel();
3470  if (!kernel)
3471  SG_ERROR("No kernel.\n")
3472 
3473  if (kernel->get_kernel_type()==K_COMBINED)
3474  {
3475  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3476  if (!kernel)
3477  SG_ERROR("No last kernel.\n")
3478 
3479  EKernelType ktype=kernel->get_kernel_type();
3480  if (ktype!=K_WEIGHTEDDEGREE && ktype!=K_WEIGHTEDDEGREEPOS)
3481  SG_ERROR("Unsupported kernel.\n")
3482  }
3483 
3484  bool success=false;
3485  float64_t* weights=NULL;
3486  int32_t dim=0;
3487  int32_t len=0;
3488  get_matrix(weights, dim, len);
3489 
3490  if (kernel->get_kernel_type()==K_WEIGHTEDDEGREE)
3491  {
3493  (CWeightedDegreeStringKernel*) kernel;
3494 
3495  if (dim!=1 && len>0)
3496  SG_ERROR("Dimension mismatch (should be 1 x seq_length or 0x0\n")
3497 
3498  ui_kernel->init_kernel("TRAIN");
3499  success=k->set_position_weights(weights, len);
3500  }
3501  else
3502  {
3505  char* target=NULL;
3506  bool is_train=true;
3507 
3508  if (m_nrhs==3)
3509  {
3510  int32_t tlen=0;
3511  target=get_string(tlen);
3512  if (!target)
3513  {
3514  SG_FREE(weights);
3515  SG_ERROR("Couldn't find second argument to method.\n")
3516  }
3517 
3518  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
3519  {
3520  SG_FREE(target);
3521  SG_ERROR("Second argument none of TRAIN or TEST.\n")
3522  }
3523 
3524  if (strmatch(target, "TEST"))
3525  is_train=false;
3526  }
3527 
3528  if (dim!=1 && len>0)
3529  {
3530  SG_FREE(target);
3531  SG_ERROR("Dimension mismatch (should be 1 x seq_length or 0x0\n")
3532  }
3533 
3534  if (dim==0 && len==0)
3535  {
3536  if (create_return_values(3))
3537  {
3538  if (is_train)
3539  success=k->delete_position_weights_lhs();
3540  else
3541  success=k->delete_position_weights_rhs();
3542  }
3543  else
3544  success=k->delete_position_weights();
3545  }
3546  else
3547  {
3548  if (create_return_values(3))
3549  {
3550  if (is_train)
3551  success=k->set_position_weights_lhs(weights, dim, len);
3552  else
3553  success=k->set_position_weights_rhs(weights, dim, len);
3554  }
3555  else
3556  {
3557  ui_kernel->init_kernel("TRAIN");
3558  k->set_position_weights(SGVector<float64_t>(weights, len));
3559  success=true;
3560  }
3561  }
3562 
3563  SG_FREE(target);
3564  }
3565 
3566  return success;
3567 }
3568 
3569 bool CSGInterface::cmd_get_subkernel_weights()
3570 {
3571  if (m_nrhs!=1 || !create_return_values(1))
3572  return false;
3573 
3574  CKernel *kernel=ui_kernel->get_kernel();
3575  if (!kernel)
3576  SG_ERROR("Invalid kernel.\n")
3577 
3578  EKernelType ktype=kernel->get_kernel_type();
3579  const float64_t* weights=NULL;
3580 
3581  if (ktype==K_COMBINED)
3582  {
3583  int32_t num_weights=-1;
3584  weights=((CCombinedKernel *) kernel)->get_subkernel_weights(num_weights);
3585 
3586  // matrices of shape 1 x num_weight are returned
3587  set_matrix(weights, 1, num_weights);
3588  return true;
3589  }
3590 
3591  int32_t degree=-1;
3592  int32_t length=-1;
3593 
3594  if (ktype==K_WEIGHTEDDEGREE)
3595  {
3596  weights=((CWeightedDegreeStringKernel *) kernel)->
3597  get_degree_weights(degree, length);
3598  }
3599  else if (ktype==K_WEIGHTEDDEGREEPOS)
3600  {
3601  weights=((CWeightedDegreePositionStringKernel *) kernel)->
3602  get_degree_weights(degree, length);
3603  }
3604  else
3605  SG_ERROR("Setting subkernel weights not supported on this kernel.\n")
3606 
3607  if (length==0)
3608  length=1;
3609 
3610  set_matrix(weights, degree, length);
3611  return true;
3612 }
3613 
3614 bool CSGInterface::cmd_set_subkernel_weights()
3615 {
3616  if (m_nrhs!=2 || !create_return_values(0))
3617  return false;
3618 
3619  CKernel* kernel=ui_kernel->get_kernel();
3620  if (!kernel)
3621  SG_ERROR("No kernel.\n")
3622 
3623  bool success=false;
3624  float64_t* weights=NULL;
3625  int32_t dim=0;
3626  int32_t len=0;
3627  get_matrix(weights, dim, len);
3628 
3629  EKernelType ktype=kernel->get_kernel_type();
3630  if (ktype==K_WEIGHTEDDEGREE)
3631  {
3633  (CWeightedDegreeStringKernel*) kernel;
3634  int32_t degree=k->get_degree();
3635  if (dim!=degree || len<1)
3636  SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree)
3637 
3638  if (len==1)
3639  len=0;
3640 
3641  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3642  }
3643  else if (ktype==K_WEIGHTEDDEGREEPOS)
3644  {
3647  int32_t degree=k->get_degree();
3648  if (dim!=degree || len<1)
3649  SG_ERROR("WDPos: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree)
3650 
3651  if (len==1)
3652  len=0;
3653 
3654  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3655  }
3656  else // all other kernels
3657  {
3658  int32_t num_subkernels=kernel->get_num_subkernels();
3659  if (dim!=1 || len!=num_subkernels)
3660  SG_ERROR("All: Dimension mismatch (should be 1 x num_subkernels)\n")
3661 
3662  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3663  success=true;
3664  }
3665 
3666  return success;
3667 }
3668 
3669 bool CSGInterface::cmd_set_subkernel_weights_combined()
3670 {
3671  if (m_nrhs!=3 || !create_return_values(0))
3672  return false;
3673 
3674  CKernel* kernel=ui_kernel->get_kernel();
3675  if (!kernel)
3676  SG_ERROR("No kernel.\n")
3677  if (kernel->get_kernel_type()!=K_COMBINED)
3678  SG_ERROR("Only works for combined kernels.\n")
3679 
3680  bool success=false;
3681  float64_t* weights=NULL;
3682  int32_t dim=0;
3683  int32_t len=0;
3684  get_matrix(weights, dim, len);
3685 
3686  int32_t idx=get_int();
3687  SG_DEBUG("using kernel_idx=%i\n", idx)
3688 
3689  kernel=((CCombinedKernel*) kernel)->get_kernel(idx);
3690  if (!kernel)
3691  SG_ERROR("No subkernel at idx %d.\n", idx)
3692 
3693  EKernelType ktype=kernel->get_kernel_type();
3694  if (ktype==K_WEIGHTEDDEGREE)
3695  {
3697  (CWeightedDegreeStringKernel*) kernel;
3698  int32_t degree=k->get_degree();
3699  if (dim!=degree || len<1)
3700  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n")
3701 
3702  if (len==1)
3703  len=0;
3704 
3705  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3706  }
3707  else if (ktype==K_WEIGHTEDDEGREEPOS)
3708  {
3711  int32_t degree=k->get_degree();
3712  if (dim!=degree || len<1)
3713  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n")
3714 
3715  if (len==1)
3716  len=0;
3717 
3718  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3719  }
3720  else // all other kernels
3721  {
3722  int32_t num_subkernels=kernel->get_num_subkernels();
3723  if (dim!=1 || len!=num_subkernels)
3724  SG_ERROR("Dimension mismatch (should be 1 x num_subkernels)\n")
3725 
3726  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3727  success=true;
3728  }
3729 
3730  return success;
3731 }
3732 
3733 bool CSGInterface::cmd_get_dotfeature_weights_combined()
3734 {
3735  if (m_nrhs!=2 || !create_return_values(1))
3736  return false;
3737 
3738  int32_t tlen=0;
3739  char* target=get_string(tlen);
3740  CFeatures* features=NULL;
3741 
3742  if (strmatch(target, "TRAIN"))
3743  features=ui_features->get_train_features();
3744  else if (strmatch(target, "TEST"))
3745  features=ui_features->get_test_features();
3746  else
3747  {
3748  SG_FREE(target);
3749  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
3750  }
3751  SG_FREE(target);
3752 
3753  if (!features)
3754  SG_ERROR("No features.\n")
3755  if (features->get_feature_class()!=C_COMBINED_DOT)
3756  SG_ERROR("Only works for combined dot features.\n")
3757 
3758  SGVector<float64_t> weights = ((CCombinedDotFeatures*) features)->get_subfeature_weights();
3759  set_vector(weights.vector, weights.vlen);
3760 
3761  return true;
3762 }
3763 
3764 bool CSGInterface::cmd_set_dotfeature_weights_combined()
3765 {
3766  if (m_nrhs!=3 || !create_return_values(0))
3767  return false;
3768 
3769  int32_t tlen=0;
3770  char* target=get_string(tlen);
3771  CFeatures* features=NULL;
3772 
3773  if (strmatch(target, "TRAIN"))
3774  features=ui_features->get_train_features();
3775  else if (strmatch(target, "TEST"))
3776  features=ui_features->get_test_features();
3777  else
3778  {
3779  SG_FREE(target);
3780  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
3781  }
3782  SG_FREE(target);
3783 
3784  if (!features)
3785  SG_ERROR("No features.\n")
3786  if (features->get_feature_class()!=C_COMBINED_DOT)
3787  SG_ERROR("Only works for combined dot features.\n")
3788 
3789  float64_t* weights=NULL;
3790  int32_t dim=0;
3791  int32_t len=0;
3792  get_matrix(weights, dim, len);
3793 
3794  ((CCombinedDotFeatures*) features)->set_subfeature_weights(SGVector<float64_t>(weights, len));
3795 
3796  return true;
3797 }
3798 
3799 bool CSGInterface::cmd_set_last_subkernel_weights()
3800 {
3801  if (m_nrhs!=2 || !create_return_values(0))
3802  return false;
3803 
3804  CKernel* kernel=ui_kernel->get_kernel();
3805  if (!kernel)
3806  SG_ERROR("No kernel.\n")
3807  if (kernel->get_kernel_type()!=K_COMBINED)
3808  SG_ERROR("Only works for Combined kernels.\n")
3809 
3810  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3811  if (!kernel)
3812  SG_ERROR("No last kernel.\n")
3813 
3814  bool success=false;
3815  float64_t* weights=NULL;
3816  int32_t dim=0;
3817  int32_t len=0;
3818  get_matrix(weights, dim, len);
3819 
3820  EKernelType ktype=kernel->get_kernel_type();
3821  if (ktype==K_WEIGHTEDDEGREE)
3822  {
3824  if (dim!=k->get_degree() || len<1)
3825  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n")
3826 
3827  if (len==1)
3828  len=0;
3829 
3830  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3831  }
3832  else if (ktype==K_WEIGHTEDDEGREEPOS)
3833  {
3836  if (dim!=k->get_degree() || len<1)
3837  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n")
3838 
3839  if (len==1)
3840  len=0;
3841 
3842  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3843  }
3844  else // all other kernels
3845  {
3846  int32_t num_subkernels=kernel->get_num_subkernels();
3847  if (dim!=1 || len!=num_subkernels)
3848  SG_ERROR("Dimension mismatch (should be 1 x num_subkernels)\n")
3849 
3850  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3851  success=true;
3852  }
3853 
3854  return success;
3855 }
3856 
3857 bool CSGInterface::cmd_get_WD_position_weights()
3858 {
3859  if (m_nrhs!=1 || !create_return_values(1))
3860  return false;
3861 
3862  CKernel* kernel=ui_kernel->get_kernel();
3863  if (!kernel)
3864  SG_ERROR("No kernel.\n")
3865 
3866  if (kernel->get_kernel_type()==K_COMBINED)
3867  {
3868  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3869  if (!kernel)
3870  SG_ERROR("Couldn't find last kernel.\n")
3871 
3872  EKernelType ktype=kernel->get_kernel_type();
3873  if (ktype!=K_WEIGHTEDDEGREE && ktype!=K_WEIGHTEDDEGREEPOS)
3874  SG_ERROR("Wrong subkernel type.\n")
3875  }
3876 
3877  int32_t len=0;
3878  const float64_t* position_weights;
3879 
3880  if (kernel->get_kernel_type()==K_WEIGHTEDDEGREE)
3881  position_weights=((CWeightedDegreeStringKernel*) kernel)->get_position_weights(len);
3882  else
3883  position_weights=((CWeightedDegreePositionStringKernel*) kernel)->get_position_weights(len);
3884 
3885  if (position_weights==NULL)
3886  set_vector(position_weights, 0);
3887  else
3888  set_vector(position_weights, len);
3889 
3890  return true;
3891 }
3892 
3893 bool CSGInterface::cmd_get_last_subkernel_weights()
3894 {
3895  if (m_nrhs!=1 || !create_return_values(1))
3896  return false;
3897 
3898  CKernel* kernel=ui_kernel->get_kernel();
3899  EKernelType ktype=kernel->get_kernel_type();
3900  if (!kernel)
3901  SG_ERROR("No kernel.\n")
3902  if (ktype!=K_COMBINED)
3903  SG_ERROR("Only works for Combined kernels.\n")
3904 
3905  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3906  if (!kernel)
3907  SG_ERROR("Couldn't find last kernel.\n")
3908 
3909  int32_t degree=0;
3910  int32_t len=0;
3911 
3912  if (ktype==K_COMBINED)
3913  {
3914  int32_t num_weights=0;
3915  const float64_t* weights=
3916  ((CCombinedKernel*) kernel)->get_subkernel_weights(num_weights);
3917 
3918  set_vector(weights, num_weights);
3919  return true;
3920  }
3921 
3922  float64_t* weights=NULL;
3923  if (ktype==K_WEIGHTEDDEGREE)
3924  weights=((CWeightedDegreeStringKernel*) kernel)->
3925  get_degree_weights(degree, len);
3926  else if (ktype==K_WEIGHTEDDEGREEPOS)
3927  weights=((CWeightedDegreePositionStringKernel*) kernel)->
3928  get_degree_weights(degree, len);
3929  else
3930  SG_ERROR("Only works for Weighted Degree (Position) kernels.\n")
3931 
3932  if (len==0)
3933  len=1;
3934 
3935  set_matrix(weights, degree, len);
3936 
3937  return true;
3938 }
3939 
3940 bool CSGInterface::cmd_compute_by_subkernels()
3941 {
3942  if (m_nrhs!=1 || !create_return_values(1))
3943  return false;
3944 
3945  CKernel* kernel=ui_kernel->get_kernel();
3946  if (!kernel)
3947  SG_ERROR("No kernel.\n")
3948  if (!kernel->get_rhs())
3949  SG_ERROR("No rhs.\n")
3950 
3951  int32_t num_vec=kernel->get_rhs()->get_num_vectors();
3952  int32_t degree=0;
3953  int32_t len=0;
3954  EKernelType ktype=kernel->get_kernel_type();
3955 
3956  // it would be nice to have a common base class for the WD kernels
3957  if (ktype==K_WEIGHTEDDEGREE)
3958  {
3960  k->get_degree_weights(degree, len);
3961  if (!k->is_tree_initialized())
3962  SG_ERROR("Kernel optimization not initialized.\n")
3963  }
3964  else if (ktype==K_WEIGHTEDDEGREEPOS)
3965  {
3968  k->get_degree_weights(degree, len);
3969  if (!k->is_tree_initialized())
3970  SG_ERROR("Kernel optimization not initialized.\n")
3971  }
3972  else
3973  SG_ERROR("Only works for Weighted Degree (Position) kernels.\n")
3974 
3975  if (len==0)
3976  len=1;
3977 
3978  int32_t num_feat=degree*len;
3979  int32_t num=num_feat*num_vec;
3980  float64_t* result=SG_MALLOC(float64_t, num);
3981 
3982  for (int32_t i=0; i<num; i++)
3983  result[i]=0;
3984 
3985  if (ktype==K_WEIGHTEDDEGREE)
3986  {
3988  for (int32_t i=0; i<num_vec; i++)
3989  k->compute_by_tree(i, &result[i*num_feat]);
3990  }
3991  else
3992  {
3995  for (int32_t i=0; i<num_vec; i++)
3996  k->compute_by_tree(i, &result[i*num_feat]);
3997  }
3998 
3999  set_matrix(result, num_feat, num_vec);
4000  SG_FREE(result);
4001 
4002  return true;
4003 }
4004 
4005 bool CSGInterface::cmd_init_kernel_optimization()
4006 {
4007  if (m_nrhs<1 || !create_return_values(0))
4008  return false;
4009 
4010  return ui_kernel->init_kernel_optimization();
4011 }
4012 
4013 bool CSGInterface::cmd_get_kernel_optimization()
4014 {
4015  if (m_nrhs<1 || !create_return_values(1))
4016  return false;
4017 
4018  CKernel* kernel=ui_kernel->get_kernel();
4019  if (!kernel)
4020  SG_ERROR("No kernel defined.\n")
4021 
4022  switch (kernel->get_kernel_type())
4023  {
4024  case K_WEIGHTEDDEGREEPOS:
4025  {
4026  if (m_nrhs!=2)
4027  SG_ERROR("parameter missing\n")
4028 
4029  int32_t max_order=get_int();
4030  if ((max_order<1) || (max_order>12))
4031  {
4032  SG_WARNING("max_order out of range 1..12 (%d). setting to 1\n", max_order)
4033  max_order=1;
4034  }
4035 
4037  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4038  if (!svm)
4039  SG_ERROR("No SVM defined.\n")
4040 
4041  int32_t num_suppvec=svm->get_num_support_vectors();
4042  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4043  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4044  int32_t num_feat=0;
4045  int32_t num_sym=0;
4046 
4047  for (int32_t i=0; i<num_suppvec; i++)
4048  {
4049  sv_idx[i]=svm->get_support_vector(i);
4050  sv_weight[i]=svm->get_alpha(i);
4051  }
4052 
4053  float64_t* position_weights=k->extract_w(max_order, num_feat,
4054  num_sym, NULL, num_suppvec, sv_idx, sv_weight);
4055  SG_FREE(sv_idx);
4056  SG_FREE(sv_weight);
4057 
4058  set_matrix(position_weights, num_sym, num_feat);
4059  SG_FREE(position_weights);
4060 
4061  return true;
4062  }
4063 
4064  case K_COMMWORDSTRING:
4066  {
4068  int32_t len=0;
4069  float64_t* weights;
4070  k->get_dictionary(len, weights);
4071 
4072  set_vector(weights, len);
4073  return true;
4074  }
4075  case K_LINEAR:
4076  {
4077  CLinearKernel* k=(CLinearKernel*) kernel;
4078  SGVector<float64_t> weights=k->get_w();
4079 
4080  set_vector(weights.vector, weights.size());
4081  return true;
4082  }
4083  default:
4084  SG_ERROR("Unsupported kernel %s.\n", kernel->get_name())
4085  }
4086 
4087  return true;
4088 }
4089 
4090 bool CSGInterface::cmd_delete_kernel_optimization()
4091 {
4092  if (m_nrhs<1 || !create_return_values(0))
4093  return false;
4094 
4095  return ui_kernel->delete_kernel_optimization();
4096 }
4097 
4098 bool CSGInterface::cmd_use_diagonal_speedup()
4099 {
4100  if (m_nrhs<2 || !create_return_values(0))
4101  return false;
4102 
4103  bool speedup=get_bool();
4104 
4105  CKernel* kernel=ui_kernel->get_kernel();
4106  if (!kernel)
4107  SG_ERROR("No kernel defined.\n")
4108 
4109  if (kernel->get_kernel_type()==K_COMBINED)
4110  {
4111  SG_DEBUG("Identified combined kernel.\n")
4112  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
4113  if (!kernel)
4114  SG_ERROR("No last kernel defined.\n")
4115  }
4116 
4117  if (kernel->get_kernel_type()!=K_COMMWORDSTRING)
4118  SG_ERROR("Currently only commwordstring kernel supports diagonal speedup\n")
4119 
4120  ((CCommWordStringKernel*) kernel)->set_use_dict_diagonal_optimization(speedup);
4121 
4122  SG_INFO("Diagonal speedup %s.\n", speedup ? "enabled" : "disabled")
4123 
4124  return true;
4125 }
4126 
4127 bool CSGInterface::cmd_set_kernel_optimization_type()
4128 {
4129  if (m_nrhs<2 || !create_return_values(0))
4130  return false;
4131 
4132  int32_t len=0;
4133  char* opt_type=get_str_from_str_or_direct(len);
4134 
4135  bool success=ui_kernel->set_optimization_type(opt_type);
4136 
4137  SG_FREE(opt_type);
4138  return success;
4139 }
4140 
4141 bool CSGInterface::cmd_set_solver()
4142 {
4143  if (m_nrhs<2 || !create_return_values(0))
4144  return false;
4145 
4146  int32_t len=0;
4147  char* solver=get_str_from_str_or_direct(len);
4148 
4149  bool success=ui_classifier->set_solver(solver);
4150 
4151  SG_FREE(solver);
4152  return success;
4153 }
4154 
4155 bool CSGInterface::cmd_set_constraint_generator()
4156 {
4157  if (m_nrhs<2 || !create_return_values(0))
4158  return false;
4159 
4160  int32_t len=0;
4161  char* cg=get_str_from_str_or_direct(len);
4162 
4163  bool success=ui_classifier->set_constraint_generator(cg);
4164 
4165  SG_FREE(cg);
4166  return success;
4167 }
4168 
4169 bool CSGInterface::cmd_set_prior_probs()
4170 {
4171  if (m_nrhs<3 || !create_return_values(0))
4172  return false;
4173 
4174  CSalzbergWordStringKernel* kernel=
4175  (CSalzbergWordStringKernel*) ui_kernel->get_kernel();
4176  if (kernel->get_kernel_type()!=K_SALZBERG)
4177  SG_ERROR("SalzbergWordStringKernel required for setting prior probs!\n")
4178 
4179  float64_t pos_probs=get_real_from_real_or_str();
4180  float64_t neg_probs=get_real_from_real_or_str();
4181 
4182  kernel->set_prior_probs(pos_probs, neg_probs);
4183 
4184  return true;
4185 }
4186 
4187 bool CSGInterface::cmd_set_prior_probs_from_labels()
4188 {
4189  if (m_nrhs<2 || !create_return_values(0))
4190  return false;
4191 
4192  CSalzbergWordStringKernel* kernel=
4193  (CSalzbergWordStringKernel*) ui_kernel->get_kernel();
4194  if (kernel->get_kernel_type()!=K_SALZBERG)
4195  SG_ERROR("SalzbergWordStringKernel required for setting prior probs!\n")
4196 
4197  float64_t* lab=NULL;
4198  int32_t len=0;
4199  get_vector(lab, len);
4200 
4201  CBinaryLabels* labels=new CBinaryLabels(len);
4202  for (int32_t i=0; i<len; i++)
4203  {
4204  if (!labels->set_label(i, lab[i]))
4205  SG_ERROR("Couldn't set label %d (of %d): %f.\n", i, len, lab[i])
4206  }
4207  SG_FREE(lab);
4208 
4209  kernel->set_prior_probs_from_labels(labels);
4210 
4211  SG_UNREF(labels);
4212  return true;
4213 }
4214 
4215 #ifdef USE_SVMLIGHT
4216 bool CSGInterface::cmd_resize_kernel_cache()
4217 {
4218  if (m_nrhs<2 || !create_return_values(0))
4219  return false;
4220 
4221  int32_t size=get_int_from_int_or_str();
4222  return ui_kernel->resize_kernel_cache(size);
4223 }
4224 #endif //USE_SVMLIGHT
4225 
4226 
4229 bool CSGInterface::cmd_set_distance()
4230 {
4231  if (m_nrhs<3 || !create_return_values(0))
4232  return false;
4233 
4234  CDistance* distance=NULL;
4235  int32_t len=0;
4236  char* type=get_str_from_str_or_direct(len);
4237  char* dtype=get_str_from_str_or_direct(len);
4238 
4239  if (strmatch(type, "MINKOWSKI") && m_nrhs==4)
4240  {
4241  float64_t k=get_real_from_real_or_str();
4242  distance=ui_distance->create_minkowski(k);
4243  }
4244  else if (strmatch(type, "MANHATTAN"))
4245  {
4246  if (strmatch(dtype, "REAL"))
4247  distance=ui_distance->create_generic(D_MANHATTAN);
4248  else if (strmatch(dtype, "WORD"))
4249  distance=ui_distance->create_generic(D_MANHATTANWORD);
4250  }
4251  else if (strmatch(type, "HAMMING") && strmatch(dtype, "WORD"))
4252  {
4253  bool use_sign=false;
4254  if (m_nrhs==4)
4255  use_sign=get_bool_from_bool_or_str(); // optional
4256 
4257  distance=ui_distance->create_hammingword(use_sign);
4258  }
4259  else if (strmatch(type, "CANBERRA"))
4260  {
4261  if (strmatch(dtype, "REAL"))
4262  distance=ui_distance->create_generic(D_CANBERRA);
4263  else if (strmatch(dtype, "WORD"))
4264  distance=ui_distance->create_generic(D_CANBERRAWORD);
4265  }
4266  else if (strmatch(type, "CHEBYSHEW") && strmatch(dtype, "REAL"))
4267  {
4268  distance=ui_distance->create_generic(D_CHEBYSHEW);
4269  }
4270  else if (strmatch(type, "GEODESIC") && strmatch(dtype, "REAL"))
4271  {
4272  distance=ui_distance->create_generic(D_GEODESIC);
4273  }
4274  else if (strmatch(type, "JENSEN") && strmatch(dtype, "REAL"))
4275  {
4276  distance=ui_distance->create_generic(D_JENSEN);
4277  }
4278  else if (strmatch(type, "CHISQUARE") && strmatch(dtype, "REAL"))
4279  {
4280  distance=ui_distance->create_generic(D_CHISQUARE);
4281  }
4282  else if (strmatch(type, "TANIMOTO") && strmatch(dtype, "REAL"))
4283  {
4284  distance=ui_distance->create_generic(D_TANIMOTO);
4285  }
4286  else if (strmatch(type, "COSINE") && strmatch(dtype, "REAL"))
4287  {
4288  distance=ui_distance->create_generic(D_COSINE);
4289  }
4290  else if (strmatch(type, "BRAYCURTIS") && strmatch(dtype, "REAL"))
4291  {
4292  distance=ui_distance->create_generic(D_BRAYCURTIS);
4293  }
4294  else if (strmatch(type, "EUCLIDEAN"))
4295  {
4296  if (strmatch(dtype, "REAL"))
4297  distance=ui_distance->create_generic(D_EUCLIDEAN);
4298  else if (strmatch(dtype, "SPARSEREAL"))
4299  distance=ui_distance->create_generic(D_SPARSEEUCLIDEAN);
4300  }
4301  else
4303 
4304  SG_FREE(type);
4305  SG_FREE(dtype);
4306  return ui_distance->set_distance(distance);
4307 }
4308 
4309 bool CSGInterface::cmd_init_distance()
4310 {
4312  return true;
4313 }
4314 
4315 bool CSGInterface::cmd_get_distance_matrix()
4316 {
4317  if (m_nrhs!=2 || !create_return_values(1))
4318  return false;
4319 
4320  int32_t len=0;
4321  char* target=get_string(len);
4322 
4323  bool success=ui_distance->init_distance(target);
4324 
4325  if (success)
4326  {
4327  CDistance* distance=ui_distance->get_distance();
4328  if (!distance || !distance->has_features())
4329  SG_ERROR("No distance defined or not initialized.\n")
4330 
4331  SGMatrix<float64_t> dmatrix=distance->get_distance_matrix();
4332  set_matrix(dmatrix.matrix, dmatrix.num_rows, dmatrix.num_cols);
4333  }
4334 
4335  return success;
4336 }
4337 
4338 
4339 /* POIM */
4340 
4341 bool CSGInterface::cmd_get_SPEC_consensus()
4342 {
4343  if (m_nrhs!=1 || !create_return_values(1))
4344  return false;
4345 
4346  CKernel* kernel=ui_kernel->get_kernel();
4347  if (!kernel)
4348  SG_ERROR("No kernel.\n")
4349  if (kernel->get_kernel_type()!=K_COMMWORDSTRING)
4350  SG_ERROR("Only works for CommWordString kernels.\n")
4351 
4352  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4353  ASSERT(svm)
4354  int32_t num_suppvec=svm->get_num_support_vectors();
4355  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4356  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4357  int32_t num_feat=0;
4358 
4359  for (int32_t i=0; i<num_suppvec; i++)
4360  {
4361  sv_idx[i]=svm->get_support_vector(i);
4362  sv_weight[i]=svm->get_alpha(i);
4363  }
4364 
4365  char* consensus=((CCommWordStringKernel*) kernel)->compute_consensus(
4366  num_feat, num_suppvec, sv_idx, sv_weight);
4367  SG_FREE(sv_idx);
4368  SG_FREE(sv_weight);
4369 
4370  set_vector(consensus, num_feat);
4371  SG_FREE(consensus);
4372 
4373  return true;
4374 }
4375 
4376 bool CSGInterface::cmd_get_SPEC_scoring()
4377 {
4378  if (m_nrhs!=2 || !create_return_values(1))
4379  return false;
4380 
4381  int32_t max_order=get_int();
4382  CKernel* kernel=ui_kernel->get_kernel();
4383  if (!kernel)
4384  SG_ERROR("No kernel.\n")
4385 
4386  EKernelType ktype=kernel->get_kernel_type();
4387  if (ktype!=K_COMMWORDSTRING && ktype!=K_WEIGHTEDCOMMWORDSTRING)
4388  SG_ERROR("Only works for (Weighted) CommWordString kernels.\n")
4389 
4390  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4391  ASSERT(svm)
4392  int32_t num_suppvec=svm->get_num_support_vectors();
4393  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4394  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4395  int32_t num_feat=0;
4396  int32_t num_sym=0;
4397 
4398  for (int32_t i=0; i<num_suppvec; i++)
4399  {
4400  sv_idx[i]=svm->get_support_vector(i);
4401  sv_weight[i]=svm->get_alpha(i);
4402  }
4403 
4404  if ((max_order<1) || (max_order>8))
4405  {
4406  SG_WARNING("max_order out of range 1..8 (%d). setting to 1\n", max_order)
4407  max_order=1;
4408  }
4409 
4410  float64_t* position_weights=NULL;
4411  if (ktype==K_COMMWORDSTRING)
4412  position_weights=((CCommWordStringKernel*) kernel)->compute_scoring(
4413  max_order, num_feat, num_sym, NULL,
4414  num_suppvec, sv_idx, sv_weight);
4415  else
4416  position_weights=((CWeightedCommWordStringKernel*) kernel)->compute_scoring(
4417  max_order, num_feat, num_sym, NULL,
4418  num_suppvec, sv_idx, sv_weight);
4419  SG_FREE(sv_idx);
4420  SG_FREE(sv_weight);
4421 
4422  set_matrix(position_weights, num_sym, num_feat);
4423  SG_FREE(position_weights);
4424 
4425  return true;
4426 }
4427 
4428 bool CSGInterface::cmd_get_WD_consensus()
4429 {
4430  if (m_nrhs!=1 || !create_return_values(1))
4431  return false;
4432 
4433  CKernel* kernel=ui_kernel->get_kernel();
4434  if (!kernel)
4435  SG_ERROR("No kernel.\n")
4436  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4437  SG_ERROR("Only works for Weighted Degree Position kernels.\n")
4438 
4439  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4440  ASSERT(svm)
4441  int32_t num_suppvec=svm->get_num_support_vectors();
4442  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4443  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4444  int32_t num_feat=0;
4445 
4446  for (int32_t i=0; i<num_suppvec; i++)
4447  {
4448  sv_idx[i]=svm->get_support_vector(i);
4449  sv_weight[i]=svm->get_alpha(i);
4450  }
4451 
4452  char* consensus=((CWeightedDegreePositionStringKernel*) kernel)->compute_consensus(
4453  num_feat, num_suppvec, sv_idx, sv_weight);
4454  SG_FREE(sv_idx);
4455  SG_FREE(sv_weight);
4456 
4457  set_vector(consensus, num_feat);
4458  SG_FREE(consensus);
4459 
4460  return true;
4461 }
4462 
4463 bool CSGInterface::cmd_compute_POIM_WD()
4464 {
4465  if (m_nrhs!=3 || !create_return_values(1))
4466  return false;
4467 
4468  int32_t max_order=get_int();
4469  float64_t* distribution=NULL;
4470  int32_t num_dfeat=0;
4471  int32_t num_dvec=0;
4472  get_matrix(distribution, num_dfeat, num_dvec);
4473 
4474  if (!distribution)
4475  SG_ERROR("Wrong distribution.\n")
4476 
4477  CKernel* kernel=ui_kernel->get_kernel();
4478  if (!kernel)
4479  SG_ERROR("No Kernel.\n")
4480  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4481  SG_ERROR("Only works for Weighted Degree Position kernels.\n")
4482 
4483  int32_t seqlen=0;
4484  int32_t num_sym=0;
4486  (((CWeightedDegreePositionStringKernel*) kernel)->get_lhs());
4487  ASSERT(sfeat)
4488  seqlen=sfeat->get_max_vector_length();
4489  num_sym=(int32_t) sfeat->get_num_symbols();
4490 
4491  if (num_dvec!=seqlen || num_dfeat!=num_sym)
4492  {
4493  SG_ERROR("distribution should have (seqlen x num_sym) elements"
4494  "(seqlen: %d vs. %d symbols: %d vs. %d)\n", seqlen,
4495  num_dvec, num_sym, num_dfeat);
4496  }
4497 
4498  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4499  ASSERT(svm)
4500  int32_t num_suppvec=svm->get_num_support_vectors();
4501  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4502  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4503 
4504  for (int32_t i=0; i<num_suppvec; i++)
4505  {
4506  sv_idx[i]=svm->get_support_vector(i);
4507  sv_weight[i]=svm->get_alpha(i);
4508  }
4509 
4510  /*
4511  if ((max_order < 1) || (max_order > 12))
4512  {
4513  SG_WARNING("max_order out of range 1..12 (%d). setting to 1.\n", max_order)
4514  max_order=1;
4515  }
4516  */
4517 
4518  float64_t* position_weights;
4519  position_weights=((CWeightedDegreePositionStringKernel*) kernel)->compute_POIM(
4520  max_order, seqlen, num_sym, NULL,
4521  num_suppvec, sv_idx, sv_weight, distribution);
4522  SG_FREE(sv_idx);
4523  SG_FREE(sv_weight);
4524 
4525  set_matrix(position_weights, num_sym, seqlen);
4526  SG_FREE(position_weights);
4527 
4528  return true;
4529  }
4530 
4531  bool CSGInterface::cmd_get_WD_scoring()
4532  {
4533  if (m_nrhs!=2 || !create_return_values(1))
4534  return false;
4535 
4536  int32_t max_order=get_int();
4537 
4538  CKernel* kernel=ui_kernel->get_kernel();
4539  if (!kernel)
4540  SG_ERROR("No kernel.\n")
4541  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4542  SG_ERROR("Only works for Weighted Degree Position kernels.\n")
4543 
4544  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4545  ASSERT(svm)
4546  int32_t num_suppvec=svm->get_num_support_vectors();
4547  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4548  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4549  int32_t num_feat=0;
4550  int32_t num_sym=0;
4551 
4552  for (int32_t i=0; i<num_suppvec; i++)
4553  {
4554  sv_idx[i]=svm->get_support_vector(i);
4555  sv_weight[i]=svm->get_alpha(i);
4556  }
4557 
4558  if ((max_order<1) || (max_order>12))
4559  {
4560  SG_WARNING("max_order out of range 1..12 (%d). setting to 1\n", max_order)
4561  max_order=1;
4562  }
4563 
4564  float64_t* position_weights=
4565  ((CWeightedDegreePositionStringKernel*) kernel)->compute_scoring(
4566  max_order, num_feat, num_sym, NULL, num_suppvec, sv_idx, sv_weight);
4567  SG_FREE(sv_idx);
4568  SG_FREE(sv_weight);
4569 
4570  set_matrix(position_weights, num_sym, num_feat);
4571  SG_FREE(position_weights);
4572 
4573  return true;
4574 }
4575 
4576 
4577 /* Classifier */
4578 
4579 bool CSGInterface::cmd_classify()
4580 {
4581  if (m_nrhs!=1 || !create_return_values(1))
4582  return false;
4583 
4584  if (!ui_kernel->get_kernel() ||
4585  ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM)
4586  {
4587  CFeatures* feat=ui_features->get_test_features();
4588  if (!feat)
4589  SG_ERROR("No features found.\n")
4590  }
4591 
4592  CLabels* labels=ui_classifier->classify();
4593  if (!labels)
4594  SG_ERROR("Classify failed\n")
4595 
4596  int32_t num_vec=labels->get_num_labels();
4597  float64_t* result=SG_MALLOC(float64_t, num_vec);
4598  for (int32_t i=0; i<num_vec; i++)
4599  {
4600  float64_t value = 0;
4601  switch (labels->get_label_type())
4602  {
4603  case LT_REGRESSION:
4604  value = ((CRegressionLabels*) labels)->get_label(i);
4605  break;
4606  case LT_BINARY:
4607  value = ((CBinaryLabels*) labels)->get_value(i);
4608  break;
4609  case LT_MULTICLASS:
4610  value = ((CMulticlassLabels*) labels)->get_label(i);
4611  break;
4612  default:
4614  break;
4615  }
4616  result[i]=value;
4617  }
4618  SG_UNREF(labels);
4619 
4620  set_vector(result, num_vec);
4621  SG_FREE(result);
4622 
4623  return true;
4624 }
4625 
4626 bool CSGInterface::cmd_classify_example()
4627 {
4628  if (m_nrhs!=2 || !create_return_values(1))
4629  return false;
4630 
4631  int32_t idx=get_int();
4632  float64_t result=0;
4633 
4634  if (!ui_classifier->classify_example(idx, result))
4635  SG_ERROR("Classify_example failed.\n")
4636 
4637  set_real(result);
4638 
4639  return true;
4640 }
4641 
4642 bool CSGInterface::cmd_get_classifier()
4643 {
4644  if (m_nrhs<1 || m_nrhs>2 || !create_return_values(2))
4645  return false;
4646 
4647  int32_t idx=-1;
4648  if (m_nrhs==2)
4649  idx=get_int();
4650 
4651  float64_t* bias=NULL;
4652  float64_t* weights=NULL;
4653  int32_t rows=0;
4654  int32_t cols=0;
4655  int32_t brows=0;
4656  int32_t bcols=0;
4657 
4658  if (!ui_classifier->get_trained_classifier(
4659  weights, rows, cols, bias, brows, bcols, idx))
4660  return false;
4661 
4662  //SG_PRINT("brows %d, bcols %d\n", brows, bcols)
4663  //CMath::display_matrix(bias, brows, bcols);
4664  set_matrix(bias, brows, bcols);
4665  SG_FREE(bias);
4666 
4667  //SG_PRINT("rows %d, cols %d\n", rows, cols)
4668  //CMath::display_matrix(weights, rows, cols);
4669  set_matrix(weights, rows, cols);
4670  SG_FREE(weights);
4671 
4672  return true;
4673 }
4674 
4675 bool CSGInterface::cmd_new_classifier()
4676 {
4677  if (m_nrhs<2 || !create_return_values(0))
4678  return false;
4679 
4680  int32_t len=0;
4681  char* name=get_str_from_str_or_direct(len);
4682  int32_t d=6;
4683  int32_t from_d=40;
4684 
4685  if (m_nrhs>2)
4686  {
4687  d=get_int_from_int_or_str();
4688 
4689  if (m_nrhs>3)
4690  from_d=get_int_from_int_or_str();
4691  }
4692 
4693  bool success=ui_classifier->new_classifier(name, d, from_d);
4694 
4695  SG_FREE(name);
4696  return success;
4697 }
4698 
4699 bool CSGInterface::cmd_save_classifier()
4700 {
4701  if (m_nrhs<2 || !create_return_values(0))
4702  return false;
4703 
4704  int32_t len=0;
4705  char* filename=get_str_from_str_or_direct(len);
4706 
4707  bool success=ui_classifier->save(filename);
4708 
4709  SG_FREE(filename);
4710  return success;
4711 }
4712 
4713 bool CSGInterface::cmd_load_classifier()
4714 {
4715  if (m_nrhs<3 || !create_return_values(0))
4716  return false;
4717 
4718  int32_t len=0;
4719  char* filename=get_str_from_str_or_direct(len);
4720  char* type=get_str_from_str_or_direct(len);
4721 
4722  bool success=ui_classifier->load(filename, type);
4723 
4724  if (dynamic_cast<CKernelMachine*>(ui_classifier->get_classifier()))
4725  {
4726  CKernelMachine* kernel_machine = dynamic_cast<CKernelMachine*>(ui_classifier->get_classifier());
4727  ui_features->set_train_features(kernel_machine->get_kernel()->get_lhs());
4728  ui_features->set_test_features(kernel_machine->get_kernel()->get_rhs());
4729  ui_kernel->set_kernel(kernel_machine->get_kernel());
4730  }
4731 
4732  SG_FREE(filename);
4733  SG_FREE(type);
4734  return success;
4735 }
4736 
4737 bool CSGInterface::cmd_get_num_svms()
4738 {
4739  if (m_nrhs!=1 || !create_return_values(1))
4740  return false;
4741 
4742  set_int(ui_classifier->get_num_svms());
4743 
4744  return true;
4745 }
4746 
4747 
4748 bool CSGInterface::cmd_get_svm()
4749 {
4750  return cmd_get_classifier();
4751 }
4752 
4753 bool CSGInterface::cmd_set_svm()
4754 {
4755  if (m_nrhs!=3 || !create_return_values(0))
4756  return false;
4757 
4758  float64_t bias=get_real();
4759 
4760  float64_t* alphas=NULL;
4761  int32_t num_feat_alphas=0;
4762  int32_t num_vec_alphas=0;
4763  get_matrix(alphas, num_feat_alphas, num_vec_alphas);
4764 
4765  if (!alphas)
4766  SG_ERROR("No proper alphas given.\n")
4767  if (num_vec_alphas!=2)
4768  SG_ERROR("Not 2 vectors in alphas.\n")
4769 
4770  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4771  if (!svm)
4772  SG_ERROR("No SVM object available.\n")
4773 
4774  svm->create_new_model(num_feat_alphas);
4775  svm->set_bias(bias);
4776 
4777  int32_t num_support_vectors=svm->get_num_support_vectors();
4778  for (int32_t i=0; i<num_support_vectors; i++)
4779  {
4780  svm->set_alpha(i, alphas[i]);
4781  svm->set_support_vector(i, (int32_t) alphas[i+num_support_vectors]);
4782  }
4783  SG_FREE(alphas);
4784 
4785  return true;
4786 }
4787 
4788 bool CSGInterface::cmd_set_linear_classifier()
4789 {
4790  if (m_nrhs!=3 || !create_return_values(0))
4791  return false;
4792 
4793  float64_t bias=get_real();
4794 
4795  float64_t* w=NULL;
4796  int32_t len=0;
4797  get_vector(w, len);
4798 
4799  if (!len)
4800  SG_ERROR("No proper weight vector given.\n")
4801 
4802  CLinearMachine* c=(CLinearMachine*) ui_classifier->get_classifier();
4803  if (!c)
4804  SG_ERROR("No Linear Classifier object available.\n")
4805 
4806  c->set_w(SGVector<float64_t>(w, len));
4807  c->set_bias(bias);
4808  return true;
4809 }
4810 
4811 bool CSGInterface::cmd_get_svm_objective()
4812 {
4813  if (m_nrhs!=1 || !create_return_values(1))
4814  return false;
4815 
4816  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4817  if (!svm)
4818  SG_ERROR("No SVM set.\n")
4819 
4820  set_real(svm->get_objective());
4821 
4822  return true;
4823 }
4824 
4825 bool CSGInterface::cmd_compute_svm_primal_objective()
4826 {
4827  return do_compute_objective(SVM_PRIMAL);
4828 }
4829 
4830 bool CSGInterface::cmd_compute_svm_dual_objective()
4831 {
4832  return do_compute_objective(SVM_DUAL);
4833 }
4834 
4835 bool CSGInterface::cmd_compute_mkl_dual_objective()
4836 {
4837  return do_compute_objective(MKL_DUAL);
4838 }
4839 
4840 bool CSGInterface::cmd_compute_relative_mkl_duality_gap()
4841 {
4842  return do_compute_objective(MKL_RELATIVE_DUALITY_GAP);
4843 }
4844 
4845 bool CSGInterface::cmd_compute_absolute_mkl_duality_gap()
4846 {
4847  return do_compute_objective(MKL_ABSOLUTE_DUALITY_GAP);
4848 }
4849 
4850 bool CSGInterface::do_compute_objective(E_WHICH_OBJ obj)
4851 {
4852  if (m_nrhs!=1 || !create_return_values(1))
4853  return false;
4854 
4855  float64_t result=23.5;
4856 
4857  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4858  if (!svm)
4859  SG_ERROR("No SVM set.\n")
4860 
4861  CLabels* trainlabels=NULL;
4862  trainlabels=ui_labels->get_train_labels();
4863 
4864  if (!trainlabels)
4865  SG_ERROR("No trainlabels available.\n")
4866 
4867  CKernel* kernel=ui_kernel->get_kernel();
4868  if (!kernel)
4869  SG_ERROR("No kernel available.\n")
4870 
4871  if (!ui_kernel->is_initialized() || !kernel->has_features())
4872  SG_ERROR("Kernel not initialized.\n")
4873 
4874  ((CKernelMachine*) svm)->set_labels(trainlabels);
4875  ((CKernelMachine*) svm)->set_kernel(kernel);
4876 
4877 
4878  switch (obj)
4879  {
4880  case SVM_PRIMAL:
4881  result=svm->compute_svm_primal_objective();
4882  break;
4883  case SVM_DUAL:
4884  result=svm->compute_svm_dual_objective();
4885  break;
4886  case MKL_PRIMAL:
4888  result=((CMKL*) svm)->compute_mkl_primal_objective();
4889  break;
4890  case MKL_DUAL:
4892  result=((CMKL*) svm)->compute_mkl_dual_objective();
4893  break;
4894  case MKL_RELATIVE_DUALITY_GAP:
4895  {
4897  float64_t primal=((CMKL*) svm)->compute_mkl_dual_objective();
4898  float64_t dual=((CMKL*) svm)->compute_mkl_primal_objective();
4899  result=(primal-dual)/dual;
4900  }
4901  break;
4902  case MKL_ABSOLUTE_DUALITY_GAP:
4903  {
4905  float64_t primal=((CMKL*) svm)->compute_mkl_dual_objective();
4906  float64_t dual=((CMKL*) svm)->compute_mkl_primal_objective();
4907  result=dual-primal;
4908  }
4909  break;
4910  default:
4911  SG_SERROR("Error calling do_compute_objective\n")
4912  return false;
4913  };
4914 
4915  set_real(result);
4916  return true;
4917 }
4918 
4919 bool CSGInterface::cmd_train_classifier()
4920 {
4921  if (m_nrhs<1 || !create_return_values(0))
4922  return false;
4923 
4924  CMachine* classifier=ui_classifier->get_classifier();
4925  if (!classifier)
4926  SG_ERROR("No classifier available.\n")
4927 
4928  EMachineType type=classifier->get_classifier_type();
4929  switch (type)
4930  {
4931  case CT_LIGHT:
4932  case CT_LIGHTONECLASS:
4933  case CT_LIBSVM:
4934  case CT_SCATTERSVM:
4935  case CT_MPD:
4936  case CT_GPBT:
4937  case CT_CPLEXSVM:
4938  case CT_GMNPSVM:
4939  case CT_GNPPSVM:
4940  case CT_KERNELPERCEPTRON:
4941  case CT_LIBSVR:
4942  case CT_LIBSVMMULTICLASS:
4943  case CT_LIBSVMONECLASS:
4944  case CT_SVRLIGHT:
4945  case CT_LARANK:
4946  return ui_classifier->train_svm();
4947  case CT_MKLMULTICLASS:
4948  return ui_classifier->train_mkl_multiclass();
4949  case CT_MKLCLASSIFICATION:
4950  case CT_MKLREGRESSION:
4951  case CT_MKLONECLASS:
4952  return ui_classifier->train_mkl();
4953 
4955  return ui_classifier->train_krr();
4956 
4957  case CT_KNN:
4958  {
4959  if (m_nrhs<2)
4960  return false;
4961 
4962  int32_t k=get_int_from_int_or_str();
4963 
4964  return ui_classifier->train_knn(k);
4965  }
4966 
4967  case CT_KMEANS:
4968  {
4969  if (m_nrhs<3)
4970  return false;
4971 
4972  int32_t k=get_int_from_int_or_str();
4973  int32_t max_iter=get_int_from_int_or_str();
4974 
4975  return ui_classifier->train_clustering(k, max_iter);
4976  }
4977 
4978  case CT_HIERARCHICAL:
4979  {
4980  if (m_nrhs<2)
4981  return false;
4982 
4983  int32_t merges=get_int_from_int_or_str();
4984 
4985  return ui_classifier->train_clustering(merges);
4986  }
4987 
4988  case CT_LDA:
4989  {
4990  float64_t gamma=0;
4991  if (m_nrhs==2)
4992  gamma=get_real_from_real_or_str();
4993 
4994  return ui_classifier->train_linear(gamma);
4995  }
4996 
4997  case CT_PERCEPTRON:
4998  case CT_SVMLIN:
4999  case CT_SVMPERF:
5000  case CT_SVMOCAS:
5001  case CT_SVMSGD:
5002  case CT_LPM:
5003  case CT_LPBOOST:
5004  case CT_LIBLINEAR:
5005  return ui_classifier->train_linear();
5006 
5007  case CT_WDSVMOCAS:
5008  return ui_classifier->train_wdocas();
5009 
5010  default:
5011  SG_ERROR("Unknown classifier type %d.\n", type)
5012  }
5013 
5014  return false;
5015 }
5016 
5017 bool CSGInterface::cmd_do_auc_maximization()
5018 {
5019  if (m_nrhs!=2 || !create_return_values(0))
5020  return false;
5021 
5022  bool do_auc=get_bool_from_bool_or_str();
5023 
5024  return ui_classifier->set_do_auc_maximization(do_auc);
5025 }
5026 
5027 bool CSGInterface::cmd_set_perceptron_parameters()
5028 {
5029  if (m_nrhs!=3 || !create_return_values(0))
5030  return false;
5031 
5032  float64_t lernrate=get_real_from_real_or_str();
5033  int32_t maxiter=get_int_from_int_or_str();
5034 
5035  return ui_classifier->set_perceptron_parameters(lernrate, maxiter);
5036 }
5037 
5038 bool CSGInterface::cmd_set_svm_qpsize()
5039 {
5040  if (m_nrhs!=2 || !create_return_values(0))
5041  return false;
5042 
5043  int32_t qpsize=get_int_from_int_or_str();
5044 
5045  return ui_classifier->set_svm_qpsize(qpsize);
5046 }
5047 
5048 bool CSGInterface::cmd_set_svm_max_qpsize()
5049 {
5050  if (m_nrhs!=2 || !create_return_values(0))
5051  return false;
5052 
5053  int32_t max_qpsize=get_int_from_int_or_str();
5054 
5055  return ui_classifier->set_svm_max_qpsize(max_qpsize);
5056 }
5057 
5058 bool CSGInterface::cmd_set_svm_bufsize()
5059 {
5060  if (m_nrhs!=2 || !create_return_values(0))
5061  return false;
5062 
5063  int32_t bufsize=get_int_from_int_or_str();
5064 
5065  return ui_classifier->set_svm_bufsize(bufsize);
5066 }
5067 
5068 bool CSGInterface::cmd_set_svm_C()
5069 {
5070  if (m_nrhs<2 || !create_return_values(0))
5071  return false;
5072 
5073  float64_t C1=get_real_from_real_or_str();
5074  float64_t C2=C1;
5075 
5076  if (m_nrhs==3)
5077  C2=get_real_from_real_or_str();
5078 
5079  return ui_classifier->set_svm_C(C1, C2);
5080 }
5081 
5082 bool CSGInterface::cmd_set_svm_epsilon()
5083 {
5084  if (m_nrhs!=2 || !create_return_values(0))
5085  return false;
5086 
5087  float64_t epsilon=get_real_from_real_or_str();
5088 
5089  return ui_classifier->set_svm_epsilon(epsilon);
5090 }
5091 
5092 bool CSGInterface::cmd_set_svr_tube_epsilon()
5093 {
5094  if (m_nrhs!=2 || !create_return_values(0))
5095  return false;
5096 
5097  float64_t tube_epsilon=get_real_from_real_or_str();
5098 
5099  return ui_classifier->set_svr_tube_epsilon(tube_epsilon);
5100 }
5101 
5102 bool CSGInterface::cmd_set_svm_nu()
5103 {
5104  if (m_nrhs!=2 || !create_return_values(0))
5105  return false;
5106 
5107  float64_t nu=get_real_from_real_or_str();
5108 
5109  return ui_classifier->set_svm_nu(nu);
5110 }
5111 
5112 bool CSGInterface::cmd_set_svm_mkl_parameters()
5113 {
5114  if (m_nrhs<3 || m_nrhs>4 || !create_return_values(0))
5115  return false;
5116 
5117  float64_t weight_epsilon=get_real_from_real_or_str();
5118  float64_t C_mkl=get_real_from_real_or_str();
5119  float64_t mkl_norm=1.0;
5120 
5121  if (m_nrhs==4)
5122  mkl_norm=get_real_from_real_or_str();
5123 
5124  return ui_classifier->set_svm_mkl_parameters(weight_epsilon, C_mkl, mkl_norm);
5125 }
5126 
5127 bool CSGInterface::cmd_set_elasticnet_lambda()
5128 {
5129  if (m_nrhs!=2 || !create_return_values(0))
5130  return false;
5131  float64_t lambda=get_real_from_real_or_str();
5132  return ui_classifier->set_elasticnet_lambda(lambda);
5133 }
5134 
5135 bool CSGInterface::cmd_set_mkl_block_norm()
5136 {
5137  if (m_nrhs!=2 || !create_return_values(0))
5138  return false;
5139  float64_t bnorm=get_real_from_real_or_str();
5140  return ui_classifier->set_mkl_block_norm(bnorm);
5141 }
5142 
5143 
5144 bool CSGInterface::cmd_set_max_train_time()
5145 {
5146  if (m_nrhs!=2 || !create_return_values(0))
5147  return false;
5148 
5149  float64_t max_train_time=get_real_from_real_or_str();
5150 
5151  return ui_classifier->set_max_train_time(max_train_time);
5152 }
5153 
5154 bool CSGInterface::cmd_set_svm_shrinking_enabled()
5155 {
5156  if (m_nrhs!=2 || !create_return_values(0))
5157  return false;
5158 
5159  bool shrinking_enabled=get_bool_from_bool_or_str();
5160 
5161  return ui_classifier->set_svm_shrinking_enabled(shrinking_enabled);
5162 }
5163 
5164 bool CSGInterface::cmd_set_svm_batch_computation_enabled()
5165 {
5166  if (m_nrhs!=2 || !create_return_values(0))
5167  return false;
5168 
5169  bool batch_computation_enabled=get_bool_from_bool_or_str();
5170 
5171  return ui_classifier->set_svm_batch_computation_enabled(
5172  batch_computation_enabled);
5173 }
5174 
5175 bool CSGInterface::cmd_set_svm_linadd_enabled()
5176 {
5177  if (m_nrhs!=2 || !create_return_values(0))
5178  return false;
5179 
5180  bool linadd_enabled=get_bool_from_bool_or_str();
5181 
5182  return ui_classifier->set_svm_linadd_enabled(linadd_enabled);
5183 }
5184 
5185 bool CSGInterface::cmd_set_svm_bias_enabled()
5186 {
5187  if (m_nrhs!=2 || !create_return_values(0))
5188  return false;
5189 
5190  bool bias_enabled=get_bool_from_bool_or_str();
5191 
5192  return ui_classifier->set_svm_bias_enabled(bias_enabled);
5193 }
5194 
5195 bool CSGInterface::cmd_set_mkl_interleaved_enabled()
5196 {
5197  if (m_nrhs!=2 || !create_return_values(0))
5198  return false;
5199 
5200  bool interleaved_enabled=get_bool_from_bool_or_str();
5201 
5202  return ui_classifier->set_mkl_interleaved_enabled(interleaved_enabled);
5203 }
5204 
5205 bool CSGInterface::cmd_set_krr_tau()
5206 {
5207  if (m_nrhs!=2 || !create_return_values(0))
5208  return false;
5209 
5210  float64_t tau=get_real_from_real_or_str();
5211 
5212  return ui_classifier->set_krr_tau(tau);
5213 }
5214 
5215 
5216 /* Preproc */
5217 
5218 bool CSGInterface::cmd_add_preproc()
5219 {
5220  if (m_nrhs<2 || !create_return_values(0))
5221  return false;
5222 
5223  int32_t len=0;
5224  char* type=get_str_from_str_or_direct(len);
5225  CPreprocessor* preproc=NULL;
5226 
5227  if (strmatch(type, "NORMONE"))
5228  preproc=ui_preproc->create_generic(P_NORMONE);
5229  else if (strmatch(type, "LOGPLUSONE"))
5230  preproc=ui_preproc->create_generic(P_LOGPLUSONE);
5231  else if (strmatch(type, "SORTWORDSTRING"))
5232  preproc=ui_preproc->create_generic(P_SORTWORDSTRING);
5233  else if (strmatch(type, "SORTULONGSTRING"))
5234  preproc=ui_preproc->create_generic(P_SORTULONGSTRING);
5235  else if (strmatch(type, "DECOMPRESSCHARSTRING"))
5236  preproc=ui_preproc->create_generic(P_DECOMPRESSCHARSTRING);
5237  else if (strmatch(type, "SORTWORD"))
5238  preproc=ui_preproc->create_generic(P_SORTWORD);
5239 
5240  else if (strmatch(type, "PRUNEVARSUBMEAN"))
5241  {
5242  bool divide_by_std=false;
5243  if (m_nrhs==3)
5244  divide_by_std=get_bool_from_bool_or_str();
5245 
5246  preproc=ui_preproc->create_prunevarsubmean(divide_by_std);
5247  }
5248 
5249 #ifdef HAVE_LAPACK
5250  else if (strmatch(type, "PCA") && m_nrhs==4)
5251  {
5252  bool do_whitening=get_bool_from_bool_or_str();
5253  float64_t threshold=get_real_from_real_or_str();
5254 
5255  preproc=ui_preproc->create_pca(do_whitening, threshold);
5256  }
5257 #endif
5258 
5259  else
5261 
5262  SG_FREE(type);
5263  return ui_preproc->add_preproc(preproc);
5264 }
5265 
5266 bool CSGInterface::cmd_del_preproc()
5267 {
5268  if (m_nrhs!=1 || !create_return_values(0))
5269  return false;
5270 
5271  return ui_preproc->del_preproc();
5272 }
5273 
5274 bool CSGInterface::cmd_attach_preproc()
5275 {
5276  if (m_nrhs<2 || !create_return_values(0))
5277  return false;
5278 
5279  int32_t len=0;
5280  char* target=get_str_from_str_or_direct(len);
5281 
5282  bool do_force=false;
5283  if (m_nrhs==3)
5284  do_force=get_bool_from_bool_or_str();
5285 
5286  bool success=ui_preproc->attach_preproc(target, do_force);
5287 
5288  SG_FREE(target);
5289  return success;
5290 }
5291 
5292 bool CSGInterface::cmd_clean_preproc()
5293 {
5294  if (m_nrhs!=1 || !create_return_values(0))
5295  return false;
5296 
5297  return ui_preproc->clean_preproc();
5298 }
5299 
5300 /* Converter */
5301 
5302 bool CSGInterface::cmd_set_converter()
5303 {
5304  int32_t len=0;
5305  char* type=get_str_from_str_or_direct(len);
5306 
5307  if (strmatch(type, "lle"))
5308  {
5309  int32_t k = get_int_from_int_or_str();
5310  ui_converter->create_locallylinearembedding(k);
5311  return true;
5312  }
5313  if (strmatch(type, "npe"))
5314  {
5315  int32_t k = get_int_from_int_or_str();
5316  ui_converter->create_neighborhoodpreservingembedding(k);
5317  return true;
5318  }
5319  if (strmatch(type, "ltsa"))
5320  {
5321  int32_t k = get_int_from_int_or_str();
5322  ui_converter->create_localtangentspacealignment(k);
5323  return true;
5324  }
5325  if (strmatch(type, "lltsa"))
5326  {
5327  int32_t k = get_int_from_int_or_str();
5328  ui_converter->create_linearlocaltangentspacealignment(k);
5329  return true;
5330  }
5331  if (strmatch(type, "hlle"))
5332  {
5333  int32_t k = get_int_from_int_or_str();
5334  ui_converter->create_hessianlocallylinearembedding(k);
5335  return true;
5336  }
5337  if (strmatch(type, "laplacian_eigenmaps"))
5338  {
5339  int32_t k = get_int_from_int_or_str();
5340  int32_t width = get_real_from_real_or_str();
5341  ui_converter->create_laplacianeigenmaps(k,width);
5342  return true;
5343  }
5344  if (strmatch(type, "lpp"))
5345  {
5346  int32_t k = get_int_from_int_or_str();
5347  int32_t width = get_real_from_real_or_str();
5348  ui_converter->create_localitypreservingprojections(k,width);
5349  return true;
5350  }
5351  if (strmatch(type, "diffusion_maps"))
5352  {
5353  int32_t t = get_int_from_int_or_str();
5354  int32_t width = get_real_from_real_or_str();
5355  ui_converter->create_diffusionmaps(t,width);
5356  return true;
5357  }
5358  if (strmatch(type, "isomap"))
5359  {
5360  int32_t k = get_int_from_int_or_str();
5361  ui_converter->create_isomap(k);
5362  return true;
5363  }
5364  if (strmatch(type, "mds"))
5365  {
5366  ui_converter->create_multidimensionalscaling();
5367  return true;
5368  }
5369  if (strmatch(type, "jade"))
5370  {
5371  ui_converter->create_jade();
5372  return true;
5373  }
5374  return false;
5375 }
5376 
5377 bool CSGInterface::cmd_apply_converter()
5378 {
5379  if (m_nrhs!=1 || !create_return_values(1))
5380  return false;
5381 
5382  CDenseFeatures<float64_t>* conv_features = ui_converter->apply();
5383  SGMatrix<float64_t> converted_mat = conv_features->get_feature_matrix();
5384  set_matrix(converted_mat.matrix,converted_mat.num_rows,converted_mat.num_cols);
5385  return true;
5386 }
5387 
5388 bool CSGInterface::cmd_embed()
5389 {
5390  int32_t target_dim = get_int_from_int_or_str();
5391 
5392  if (m_nrhs!=1 || !create_return_values(1))
5393  return false;
5394 
5395  CDenseFeatures<float64_t>* embedding = ui_converter->embed(target_dim);
5396  SGMatrix<float64_t> embedding_matrix = embedding->get_feature_matrix();
5397  set_matrix(embedding_matrix.matrix,embedding_matrix.num_cols,embedding_matrix.num_rows);
5398  return true;
5399 }
5400 
5401 /* HMM */
5402 
5403 bool CSGInterface::cmd_new_plugin_estimator()
5404 {
5405  if (m_nrhs<2 || !create_return_values(0))
5406  return false;
5407 
5408  float64_t pos_pseudo=get_real_from_real_or_str();
5409  float64_t neg_pseudo=get_real_from_real_or_str();
5410 
5411  return ui_pluginestimate->new_estimator(pos_pseudo, neg_pseudo);
5412 }
5413 
5414 bool CSGInterface::cmd_train_estimator()
5415 {
5416  if (m_nrhs!=1 || !create_return_values(0))
5417  return false;
5418 
5419  return ui_pluginestimate->train();
5420 }
5421 
5422 bool CSGInterface::cmd_plugin_estimate_classify_example()
5423 {
5424  if (m_nrhs!=2 || !create_return_values(1))
5425  return false;
5426 
5427  int32_t idx=get_int();
5428  float64_t result=ui_pluginestimate->apply_one(idx);
5429 
5430  set_vector(&result, 1);
5431  return true;
5432 }
5433 
5434 bool CSGInterface::cmd_plugin_estimate_classify()
5435 {
5436  if (m_nrhs!=1 || !create_return_values(1))
5437  return false;
5438 
5439  CFeatures* feat=ui_features->get_test_features();
5440  if (!feat)
5441  SG_ERROR("No features found.\n")
5442 
5443  int32_t num_vec=feat->get_num_vectors();
5444  float64_t* result=SG_MALLOC(float64_t, num_vec);
5445  CLabels* labels=ui_pluginestimate->apply();
5446  for (int32_t i=0; i<num_vec; i++)
5447  result[i]=((CRegressionLabels*) labels)->get_label(i);
5448  SG_UNREF(labels);
5449 
5450  set_vector(result, num_vec);
5451  SG_FREE(result);
5452 
5453  return true;
5454 }
5455 
5456 bool CSGInterface::cmd_set_plugin_estimate()
5457 {
5458  if (m_nrhs!=3 || !create_return_values(0))
5459  return false;
5460 
5461  float64_t* emission_probs=NULL;
5462  int32_t num_probs=0;
5463  int32_t num_vec=0;
5464  get_matrix(emission_probs, num_probs, num_vec);
5465 
5466  if (num_vec!=2)
5467  SG_ERROR("Need at least 1 set of positive and 1 set of negative params.\n")
5468 
5469  float64_t* pos_params=emission_probs;
5470  float64_t* neg_params=&(emission_probs[num_probs]);
5471 
5472  float64_t* model_sizes=NULL;
5473  int32_t len=0;
5474  get_vector(model_sizes, len);
5475 
5476  int32_t seq_length=(int32_t) model_sizes[0];
5477  int32_t num_symbols=(int32_t) model_sizes[1];
5478  if (num_probs!=seq_length*num_symbols)
5479  SG_ERROR("Mismatch in number of emission probs and sequence length * number of symbols.\n")
5480 
5481  ui_pluginestimate->get_estimator()->set_model_params(
5482  pos_params, neg_params, seq_length, num_symbols);
5483 
5484  return true;
5485 }
5486 
5487 bool CSGInterface::cmd_get_plugin_estimate()
5488 {
5489  if (m_nrhs!=1 || !create_return_values(2))
5490  return false;
5491 
5492  float64_t* pos_params=NULL;
5493  float64_t* neg_params=NULL;
5494  int32_t num_params=0;
5495  int32_t seq_length=0;
5496  int32_t num_symbols=0;
5497 
5498  if (!ui_pluginestimate->get_estimator()->get_model_params(
5499  pos_params, neg_params, seq_length, num_symbols))
5500  return false;
5501 
5502  num_params=seq_length*num_symbols;
5503 
5504  float64_t* result=SG_MALLOC(float64_t, num_params*2);
5505  for (int32_t i=0; i<num_params; i++)
5506  result[i]=pos_params[i];
5507  for (int32_t i=0; i<num_params; i++)
5508  result[i+num_params]=neg_params[i];
5509 
5510  set_matrix(result, num_params, 2);
5511  SG_FREE(result);
5512 
5513  float64_t model_sizes[2];
5514  model_sizes[0]=(float64_t) seq_length;
5515  model_sizes[1]=(float64_t) num_symbols;
5516  set_vector(model_sizes, 2);
5517 
5518  return true;
5519 }
5520 
5521 bool CSGInterface::cmd_convergence_criteria()
5522 {
5523  if (m_nrhs<3 || !create_return_values(0))
5524  return false;
5525 
5526  int32_t num_iterations=get_int_from_int_or_str();
5527  float64_t epsilon=get_real_from_real_or_str();
5528 
5529  return ui_hmm->convergence_criteria(num_iterations, epsilon);
5530 }
5531 
5532 bool CSGInterface::cmd_normalize()
5533 {
5534  if (m_nrhs<2 || !create_return_values(0))
5535  return false;
5536 
5537  bool keep_dead_states=get_bool_from_bool_or_str();
5538 
5539  return ui_hmm->normalize(keep_dead_states);
5540 }
5541 
5542 bool CSGInterface::cmd_add_states()
5543 {
5544  if (m_nrhs<3 || !create_return_values(0))
5545  return false;
5546 
5547  int32_t num_states=get_int_from_int_or_str();
5548  float64_t value=get_real_from_real_or_str();
5549 
5550  return ui_hmm->add_states(num_states, value);
5551 }
5552 
5553 bool CSGInterface::cmd_permutation_entropy()
5554 {
5555  if (m_nrhs<3 || !create_return_values(0))
5556  return false;
5557 
5558  int32_t width=get_int_from_int_or_str();
5559  int32_t seq_num=get_int_from_int_or_str();
5560 
5561  return ui_hmm->permutation_entropy(width, seq_num);
5562 }
5563 
5564 bool CSGInterface::cmd_relative_entropy()
5565 {
5566  if (m_nrhs!=1 || !create_return_values(1))
5567  return false;
5568 
5569  float64_t* entropy=NULL;
5570  int32_t len=0;
5571  bool success=ui_hmm->relative_entropy(entropy, len);
5572  if (!success)
5573  return false;
5574 
5575  set_vector(entropy, len);
5576 
5577  SG_FREE(entropy);
5578  return true;
5579 }
5580 
5581 bool CSGInterface::cmd_entropy()
5582 {
5583  if (m_nrhs!=1 || !create_return_values(1))
5584  return false;
5585 
5586  float64_t* entropy=NULL;
5587  int32_t len=0;
5588  bool success=ui_hmm->entropy(entropy, len);
5589  if (!success)
5590  return false;
5591 
5592  set_vector(entropy, len);
5593 
5594  SG_FREE(entropy);
5595  return true;
5596 }
5597 
5598 bool CSGInterface::cmd_hmm_classify()
5599 {
5600  return do_hmm_classify(false, false);
5601 }
5602 
5603 bool CSGInterface::cmd_one_class_hmm_classify()
5604 {
5605  return do_hmm_classify(false, true);
5606 }
5607 
5608 bool CSGInterface::cmd_one_class_linear_hmm_classify()
5609 {
5610  return do_hmm_classify(true, true);
5611 }
5612 
5613 bool CSGInterface::do_hmm_classify(bool linear, bool one_class)
5614 {
5615  if (m_nrhs>1 || !create_return_values(1))
5616  return false;
5617 
5618  CFeatures* feat=ui_features->get_test_features();
5619  if (!feat)
5620  return false;
5621 
5622  int32_t num_vec=feat->get_num_vectors();
5623  CRegressionLabels* labels=NULL;
5624 
5625  if (linear) // must be one_class as well
5626  {
5627  labels=ui_hmm->linear_one_class_classify();
5628  }
5629  else
5630  {
5631  if (one_class)
5632  labels=ui_hmm->one_class_classify();
5633  else
5634  labels=ui_hmm->classify();
5635  }
5636  if (!labels)
5637  return false;
5638 
5639  float64_t* result=SG_MALLOC(float64_t, num_vec);
5640  for (int32_t i=0; i<num_vec; i++)
5641  result[i]=labels->get_label(i);
5642  SG_UNREF(labels);
5643 
5644  set_vector(result, num_vec);
5645  SG_FREE(result);
5646 
5647  return true;
5648 }
5649 
5650 bool CSGInterface::cmd_one_class_hmm_classify_example()
5651 {
5652  return do_hmm_classify_example(true);
5653 }
5654 
5655 bool CSGInterface::cmd_hmm_classify_example()
5656 {
5657  return do_hmm_classify_example(false);
5658 }
5659 
5660 bool CSGInterface::do_hmm_classify_example(bool one_class)
5661 {
5662  if (m_nrhs!=2 || !create_return_values(1))
5663  return false;
5664 
5665  int32_t idx=get_int();
5666  float64_t result=0;
5667 
5668  if (one_class)
5669  result=ui_hmm->one_class_classify_example(idx);
5670  else
5671  result=ui_hmm->classify_example(idx);
5672 
5673  set_real(result);
5674 
5675  return true;
5676 }
5677 
5678 bool CSGInterface::cmd_output_hmm()
5679 {
5680  if (m_nrhs!=1 || !create_return_values(0))
5681  return false;
5682 
5683  return ui_hmm->output_hmm();
5684 }
5685 
5686 bool CSGInterface::cmd_output_hmm_defined()
5687 {
5688  if (m_nrhs!=1 || !create_return_values(0))
5689  return false;
5690 
5691  return ui_hmm->output_hmm_defined();
5692 }
5693 
5694 bool CSGInterface::cmd_hmm_likelihood()
5695 {
5696  if (m_nrhs!=1 || !create_return_values(1))
5697  return false;
5698 
5699  CHMM* h=ui_hmm->get_current();
5700  if (!h)
5701  SG_ERROR("No HMM.\n")
5702 
5703  float64_t likelihood=h->model_probability();
5704  set_real(likelihood);
5705 
5706  return true;
5707 }
5708 
5709 bool CSGInterface::cmd_likelihood()
5710 {
5711  if (m_nrhs!=1 || !create_return_values(0))
5712  return false;
5713 
5714  return ui_hmm->likelihood();
5715 }
5716 
5717 bool CSGInterface::cmd_save_likelihood()
5718 {
5719  if (m_nrhs<2 || !create_return_values(0))
5720  return false;
5721 
5722  int32_t len=0;
5723  char* filename=get_str_from_str_or_direct(len);
5724 
5725  bool is_binary=false;
5726  if (m_nrhs==3)
5727  is_binary=get_bool_from_bool_or_str();
5728 
5729  bool success=ui_hmm->save_likelihood(filename, is_binary);
5730 
5731  SG_FREE(filename);
5732  return success;
5733 }
5734 
5735 bool CSGInterface::cmd_get_viterbi_path()
5736 {
5737  if (m_nrhs!=2 || !create_return_values(2))
5738  return false;
5739 
5740  int32_t dim=get_int();
5741  SG_DEBUG("dim: %f\n", dim)
5742 
5743  CHMM* h=ui_hmm->get_current();
5744  if (!h)
5745  return false;
5746 
5747  CFeatures* feat=ui_features->get_test_features();
5748  if (!feat || (feat->get_feature_class()!=C_STRING) ||
5749  (feat->get_feature_type()!=F_WORD))
5750  return false;
5751 
5753 
5754  int32_t num_feat=0;
5755  bool free_vec;
5756  uint16_t* vec=((CStringFeatures<uint16_t>*) feat)->get_feature_vector(dim, num_feat, free_vec);
5757  if (!vec || num_feat<=0)
5758  {
5759  ((CStringFeatures<uint16_t>*) feat)->free_feature_vector(vec, dim, free_vec);
5760  return false;
5761  }
5762  ((CStringFeatures<uint16_t>*) feat)->free_feature_vector(vec, dim, free_vec);
5763 
5764  SG_DEBUG("computing viterbi path for vector %d (length %d)\n", dim, num_feat)
5765  float64_t likelihood=0;
5766  T_STATES* path=h->get_path(dim, likelihood);
5767 
5768  set_vector(path, num_feat);
5769  SG_FREE(path);
5770  set_real(likelihood);
5771 
5772  return true;
5773 }
5774 
5775 bool CSGInterface::cmd_viterbi_train()
5776 {
5777  if (m_nrhs!=1 || !create_return_values(0))
5778  return false;
5779 
5780  return ui_hmm->viterbi_train();
5781 }
5782 
5783 bool CSGInterface::cmd_viterbi_train_defined()
5784 {
5785  if (m_nrhs!=1 || !create_return_values(0))
5786  return false;
5787 
5788  return ui_hmm->viterbi_train_defined();
5789 }
5790 
5791 bool CSGInterface::cmd_baum_welch_train()
5792 {
5793  if (m_nrhs!=1 || !create_return_values(0))
5794  return false;
5795 
5796  return ui_hmm->baum_welch_train();
5797 }
5798 
5799 bool CSGInterface::cmd_baum_welch_train_defined()
5800 {
5801  if (m_nrhs!=1 || !create_return_values(0))
5802  return false;
5803 
5804  return ui_hmm->baum_welch_train_defined();
5805 }
5806 
5807 
5808 bool CSGInterface::cmd_baum_welch_trans_train()
5809 {
5810  if (m_nrhs!=1 || !create_return_values(0))
5811  return false;
5812 
5813  return ui_hmm->baum_welch_trans_train();
5814 }
5815 
5816 bool CSGInterface::cmd_linear_train()
5817 {
5818  if (m_nrhs<1 || !create_return_values(0))
5819  return false;
5820 
5821  if (m_nrhs==2)
5822  {
5823  int32_t len=0;
5824  char* align=get_str_from_str_or_direct(len);
5825 
5826  bool success=ui_hmm->linear_train(align[0]);
5827 
5828  SG_FREE(align);
5829  return success;
5830  }
5831  else
5832  return ui_hmm->linear_train();
5833 }
5834 
5835 bool CSGInterface::cmd_save_path()
5836 {
5837  if (m_nrhs<2 || !create_return_values(0))
5838  return false;
5839 
5840  int32_t len=0;
5841  char* filename=get_str_from_str_or_direct(len);
5842 
5843  bool is_binary=false;
5844  if (m_nrhs==3)
5845  is_binary=get_bool_from_bool_or_str();
5846 
5847  bool success=ui_hmm->save_path(filename, is_binary);
5848 
5849  SG_FREE(filename);
5850  return success;
5851 }
5852 
5853 bool CSGInterface::cmd_append_hmm()
5854 {
5855  if (m_nrhs!=5 || !create_return_values(0))
5856  return false;
5857 
5858  CHMM* old_h=ui_hmm->get_current();
5859  if (!old_h)
5860  SG_ERROR("No current HMM set.\n")
5861 
5862  float64_t* p=NULL;
5863  int32_t N_p=0;
5864  get_vector(p, N_p);
5865 
5866  float64_t* q=NULL;
5867  int32_t N_q=0;
5868  get_vector(q, N_q);
5869 
5870  float64_t* a=NULL;
5871  int32_t M_a=0;
5872  int32_t N_a=0;
5873  get_matrix(a, M_a, N_a);
5874  int32_t N=N_a;
5875 
5876  float64_t* b=NULL;
5877  int32_t M_b=0;
5878  int32_t N_b=0;
5879  get_matrix(b, M_b, N_b);
5880  int32_t M=N_b;
5881 
5882  if (N_p!=N || N_q!=N || N_a!=N || M_a!=N || N_b!=M || M_b!=N)
5883  {
5884  SG_ERROR("Model matrices not matching in size.\n"
5885  "p:(%d) q:(%d) a:(%d,%d) b(%d,%d)\n",
5886  N_p, N_q, N_a, M_a, N_b, M_b);
5887  }
5888 
5889  CHMM* h=new CHMM(N, M, NULL, ui_hmm->get_pseudo());
5890  int32_t i,j;
5891 
5892  for (i=0; i<N; i++)
5893  {
5894  h->set_p(i, p[i]);
5895  h->set_q(i, q[i]);
5896  }
5897 
5898  for (i=0; i<N; i++)
5899  for (j=0; j<N; j++)
5900  h->set_a(i,j, a[i+j*N]);
5901 
5902  for (i=0; i<N; i++)
5903  for (j=0; j<M; j++)
5904  h->set_b(i,j, b[i+j*N]);
5905 
5906  old_h->append_model(h);
5907  SG_UNREF(h);
5908 
5909  return true;
5910 }
5911 
5912 bool CSGInterface::cmd_append_model()
5913 {
5914  if (m_nrhs<2 || !create_return_values(0))
5915  return false;
5916  if (m_nrhs>2 && m_nrhs!=4)
5917  return false;
5918 
5919  int32_t len=0;
5920  char* filename=get_str_from_str_or_direct(len);
5921  int32_t base1=-1;
5922  int32_t base2=-1;
5923  if (m_nrhs>2)
5924  {
5925  base1=get_int_from_int_or_str();
5926  base2=get_int_from_int_or_str();
5927  }
5928 
5929  bool success=ui_hmm->append_model(filename, base1, base2);
5930 
5931  SG_FREE(filename);
5932  return success;
5933 }
5934 
5935 bool CSGInterface::cmd_new_hmm()
5936 {
5937  if (m_nrhs!=3 || !create_return_values(0))
5938  return false;
5939 
5940  int32_t n=get_int_from_int_or_str();
5941  int32_t m=get_int_from_int_or_str();
5942 
5943  return ui_hmm->new_hmm(n, m);
5944 }
5945 
5946 bool CSGInterface::cmd_load_hmm()
5947 {
5948  if (m_nrhs!=2 || !create_return_values(0))
5949  return false;
5950 
5951  int32_t len=0;
5952  char* filename=get_str_from_str_or_direct(len);
5953 
5954  bool success=ui_hmm->load(filename);
5955 
5956  SG_FREE(filename);
5957  return success;
5958 }
5959 
5960 bool CSGInterface::cmd_save_hmm()
5961 {
5962  if (m_nrhs<2 || !create_return_values(0))
5963  return false;
5964 
5965  int32_t len=0;
5966  char* filename=get_str_from_str_or_direct(len);
5967 
5968  bool is_binary=false;
5969  if (m_nrhs==3)
5970  is_binary=get_bool_from_bool_or_str();
5971 
5972  bool success=ui_hmm->save(filename, is_binary);
5973 
5974  SG_FREE(filename);
5975  return success;
5976 }
5977 
5978 bool CSGInterface::cmd_set_hmm()
5979 {
5980  if (m_nrhs!=5 || !create_return_values(0))
5981  return false;
5982 
5983  float64_t* p=NULL;
5984  int32_t N_p=0;
5985  get_vector(p, N_p);
5986 
5987  float64_t* q=NULL;
5988  int32_t N_q=0;
5989  get_vector(q, N_q);
5990 
5991  float64_t* a=NULL;
5992  int32_t M_a=0;
5993  int32_t N_a=0;
5994  get_matrix(a, M_a, N_a);
5995  int32_t N=N_a;
5996 
5997  float64_t* b=NULL;
5998  int32_t M_b=0;
5999  int32_t N_b=0;
6000  get_matrix(b, M_b, N_b);
6001  int32_t M=N_b;
6002 
6003  if (N_p!=N || N_q!=N || N_a!=N || M_a!=N || N_b!=M || M_b!=N)
6004  {
6005  SG_ERROR("Model matrices not matching in size.\n"
6006  "p:(%d) q:(%d) a:(%d,%d) b(%d,%d)\n",
6007  N_p, N_q, N_a, M_a, N_b, M_b);
6008  }
6009 
6010  CHMM* current=ui_hmm->get_current();
6011  if (!current)
6012  SG_ERROR("Need a previously created HMM.\n")
6013 
6014  int32_t i,j;
6015 
6016  for (i=0; i<N; i++)
6017  {
6018  current->set_p(i, p[i]);
6019  current->set_q(i, q[i]);
6020  }
6021 
6022  for (i=0; i<N; i++)
6023  for (j=0; j<N; j++)
6024  current->set_a(i,j, a[i+j*N]);
6025 
6026  for (i=0; i<N; i++)
6027  for (j=0; j<M; j++)
6028  current->set_b(i,j, b[i+j*N]);
6029 
6030  CStringFeatures<uint16_t>* sf = ((CStringFeatures<uint16_t>*) (ui_features->get_train_features()));
6031  current->set_observations(sf);
6032 
6033  return true;
6034 }
6035 
6036 bool CSGInterface::cmd_set_hmm_as()
6037 {
6038  if (m_nrhs!=2 || !create_return_values(0))
6039  return false;
6040 
6041  int32_t len=0;
6042  char* target=get_str_from_str_or_direct(len);
6043 
6044  bool success=ui_hmm->set_hmm_as(target);
6045 
6046  SG_FREE(target);
6047  return success;
6048 }
6049 
6050 bool CSGInterface::cmd_set_chop()
6051 {
6052  if (m_nrhs!=2 || !create_return_values(0))
6053  return false;
6054 
6055  float64_t value=get_real_from_real_or_str();
6056  return ui_hmm->chop(value);
6057 }
6058 
6059 bool CSGInterface::cmd_set_pseudo()
6060 {
6061  if (m_nrhs!=2 || !create_return_values(0))
6062  return false;
6063 
6064  float64_t value=get_real_from_real_or_str();
6065  return ui_hmm->set_pseudo(value);
6066 }
6067 
6068 bool CSGInterface::cmd_load_definitions()
6069 {
6070  if (m_nrhs<2 || !create_return_values(0))
6071  return false;
6072 
6073  int32_t len=0;
6074  char* filename=get_str_from_str_or_direct(len);
6075 
6076  bool do_init=false;
6077  if (m_nrhs==3)
6078  do_init=get_bool_from_bool_or_str();
6079 
6080  bool success=ui_hmm->load_definitions(filename, do_init);
6081 
6082  SG_FREE(filename);
6083  return success;
6084 }
6085 
6086 bool CSGInterface::cmd_get_hmm()
6087 {
6088  if (m_nrhs!=1 || !create_return_values(4))
6089  return false;
6090 
6091  CHMM* h=ui_hmm->get_current();
6092  if (!h)
6093  return false;
6094 
6095  int32_t N=h->get_N();
6096  int32_t M=h->get_M();
6097  int32_t i=0;
6098  int32_t j=0;
6099  float64_t* p=SG_MALLOC(float64_t, N);
6100  float64_t* q=SG_MALLOC(float64_t, N);
6101 
6102  for (i=0; i<N; i++)
6103  {
6104  p[i]=h->get_p(i);
6105  q[i]=h->get_q(i);
6106  }
6107 
6108  set_vector(p, N);
6109  SG_FREE(p);
6110  set_vector(q, N);
6111  SG_FREE(q);
6112 
6113  float64_t* a=SG_MALLOC(float64_t, N*N);
6114  for (i=0; i<N; i++)
6115  for (j=0; j<N; j++)
6116  a[i+j*N]=h->get_a(i, j);
6117  set_matrix(a, N, N);
6118  SG_FREE(a);
6119 
6120  float64_t* b=SG_MALLOC(float64_t, N*M);
6121  for (i=0; i<N; i++)
6122  for (j=0; j<M; j++)
6123  b[i+j*N]=h->get_b(i, j);
6124  set_matrix(b, N, M);
6125  SG_FREE(b);
6126 
6127  return true;
6128 }
6129 
6130 bool CSGInterface::cmd_best_path()
6131 {
6132  if (m_nrhs!=3 || !create_return_values(0))
6133  return false;
6134 
6135  int32_t from=get_int_from_int_or_str();
6136  int32_t to=get_int_from_int_or_str();
6137 
6138  return ui_hmm->best_path(from, to);
6139 }
6140 
6141 bool CSGInterface::cmd_best_path_2struct()
6142 {
6143  if (m_nrhs!=12 || !create_return_values(3))
6144  return false;
6145 
6146  SG_ERROR("Sorry, this parameter list is awful!\n")
6147 
6148  return true;
6149 }
6150 
6151 void CSGInterface::get_vector(bool*& vector, int32_t& len)
6152 {
6153  int32_t* int_vector;
6154  get_vector(int_vector, len);
6155 
6156  ASSERT(len>0)
6157  vector= SG_MALLOC(bool, len);
6158 
6159  for (int32_t i=0; i<len; i++)
6160  vector[i]= (int_vector[i]!=0);
6161 
6162  SG_FREE(int_vector);
6163 }
6164 
6165 void CSGInterface::set_vector(const bool* vector, int32_t len)
6166 {
6167  int32_t* int_vector = SG_MALLOC(int32_t, len);
6168  for (int32_t i=0;i<len;i++)
6169  {
6170  if (vector[i])
6171  int_vector[i]=1;
6172  else
6173  int_vector[i]=0;
6174  }
6175  set_vector(int_vector,len);
6176  SG_FREE(int_vector);
6177 }
6178 
6179 bool CSGInterface::cmd_set_plif_struct()
6180 {
6181  // ARG 2
6182  int32_t Nid=0;
6183  int32_t* ids;
6184  get_vector(ids,Nid);
6185 
6186  // ARG 3
6187  int32_t Nname=0;
6188  int32_t Mname=0;
6189  SGString<char>* names;
6190  get_string_list(names, Nname,Mname);
6191 
6192  // ARG 4
6193  int32_t Nlimits=0;
6194  int32_t Mlimits=0;
6195  float64_t* all_limits;
6196  get_matrix(all_limits, Mlimits, Nlimits);
6197 
6198  // ARG 5
6199  int32_t Npenalties=0;
6200  int32_t Mpenalties=0;
6201  float64_t* all_penalties;
6202  get_matrix(all_penalties, Mpenalties, Npenalties);
6203 
6204  // ARG 6
6205  int32_t Ntransform=0;
6206  int32_t Mtransform=0;
6207  SGString<char>* all_transform;
6208  get_string_list(all_transform, Ntransform, Mtransform);
6209 
6210  // ARG 7
6211  int32_t Nmin=0;
6212  float64_t* min_values;
6213  get_vector(min_values,Nmin);
6214 
6215  // ARG 8
6216  int32_t Nmax=0;
6217  float64_t* max_values;
6218  get_vector(max_values,Nmax);
6219 
6220  // ARG 9
6221  int32_t Ncache=0;
6222  bool* all_use_cache;
6223  get_vector(all_use_cache,Ncache);
6224 
6225  // ARG 10
6226  int32_t Nsvm=0;
6227  int32_t* all_use_svm;
6228  get_vector(all_use_svm,Nsvm);
6229 
6230  // ARG 11
6231  int32_t Ncalc=0;
6232  bool* all_do_calc;
6233  get_vector(all_do_calc,Ncalc);
6234 
6235  if (Ncalc!=Nsvm)
6236  SG_ERROR("Ncalc!=Nsvm, Ncalc:%i, Nsvm:%i\n",Ncalc,Nsvm)
6237  if (Ncalc!=Ncache)
6238  SG_ERROR("Ncalc!=Ncache, Ncalc:%i, Ncache:%i\n",Ncalc,Ncache)
6239  if (Ncalc!=Ntransform)
6240  SG_ERROR("Ncalc!=Ntransform, Ncalc:%i, Ntransform:%i\n",Ncalc,Ntransform)
6241  if (Ncalc!=Nmin)
6242  SG_ERROR("Ncalc!=Nmin, Ncalc:%i, Nmin:%i\n",Ncalc,Nmin)
6243  if (Ncalc!=Nmax)
6244  SG_ERROR("Ncalc!=Nmax, Ncalc:%i, Nmax:%i\n",Ncalc,Nmax)
6245  if (Ncalc!=Npenalties)
6246  SG_ERROR("Ncalc!=Npenalties, Ncalc:%i, Npenalties:%i\n",Ncalc,Npenalties)
6247  if (Ncalc!=Nlimits)
6248  SG_ERROR("Ncalc!=Nlimits, Ncalc:%i, Nlimits:%i\n",Ncalc,Nlimits)
6249  if (Ncalc!=Nname)
6250  SG_ERROR("Ncalc!=Nname, Ncalc:%i, Nname:%i\n",Ncalc,Nname)
6251  if (Ncalc!=Nid)
6252  SG_ERROR("Ncalc!=Nid, Ncalc:%i, Nid:%i\n",Ncalc,Nid)
6253  if (Mlimits!=Mpenalties)
6254  SG_ERROR("Mlimits!=Mpenalties, Mlimits:%i, Mpenalties:%i\n",Mlimits,Mpenalties)
6255 
6256  int32_t N = Ncalc;
6257  int32_t M = Mlimits;
6258  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6259  pm->create_plifs(N, M);
6260  pm->set_plif_ids(SGVector<int32_t>(ids, N));
6261  pm->set_plif_min_values(SGVector<float64_t>(min_values, N));
6262  pm->set_plif_max_values(SGVector<float64_t>(max_values, N));
6263  pm->set_plif_use_cache(SGVector<bool>(all_use_cache, N));
6264  pm->set_plif_use_svm(SGVector<int32_t>(all_use_svm, N));
6265  pm->set_plif_limits(SGMatrix<float64_t>(all_limits, N, M));
6266  pm->set_plif_penalties(SGMatrix<float64_t>(all_penalties, N, M));
6267  pm->set_plif_names(names, N);
6268  pm->set_plif_transform_type(all_transform, N);
6269 
6270  SG_FREE(names);
6271  SG_FREE(all_transform);
6272  SG_FREE(all_do_calc);
6273 
6274  return true;
6275 }
6276 
6277 bool CSGInterface::cmd_get_plif_struct()
6278 {
6279  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6280  CPlif** PEN = pm->get_PEN();
6281  int32_t N = pm->get_num_plifs();
6282  int32_t M = pm->get_num_limits();
6283 
6284 
6285  int32_t* ids = SG_MALLOC(int32_t, N);
6286  float64_t* max_values = SG_MALLOC(float64_t, N);
6287  float64_t* min_values = SG_MALLOC(float64_t, N);
6288  SGString<char>* names = SG_MALLOC(SGString<char>, N);
6289  SGString<char>* all_transform = SG_MALLOC(SGString<char>, N);
6290  float64_t* all_limits = SG_MALLOC(float64_t, N*M);
6291  float64_t* all_penalties = SG_MALLOC(float64_t, N*M);
6292  bool* all_use_cache = SG_MALLOC(bool, N);
6293  int32_t* all_use_svm = SG_MALLOC(int32_t, N);
6294  bool* all_do_calc = SG_MALLOC(bool, N);
6295  for (int32_t i=0;i<N;i++)
6296  {
6297  ids[i]=PEN[i]->get_id();
6298  names[i].string = PEN[i]->get_plif_name();
6299  names[i].slen = strlen(PEN[i]->get_plif_name());
6300  SGVector<float64_t> limits = PEN[i]->get_plif_limits();
6301  SGVector<float64_t> penalties = PEN[i]->get_plif_penalties();
6302  for (int32_t j=0;j<M;j++)
6303  {
6304  all_limits[i*M+j]=limits[j];
6305  all_penalties[i*M+j]=penalties[j];
6306  }
6307  all_transform[i].string = (char*) PEN[i]->get_transform_type();
6308  all_transform[i].slen = strlen(PEN[i]->get_transform_type());
6309  min_values[i]=PEN[i]->get_min_value();
6310  max_values[i]=PEN[i]->get_max_value();
6311  all_use_cache[i]=PEN[i]->get_use_cache();
6312  all_use_svm[i]=PEN[i]->get_use_svm();
6313  all_do_calc[i]=PEN[i]->get_do_calc();
6314 
6315  }
6316  set_vector(ids,N);
6317  set_string_list(names, N);
6318  set_matrix(all_limits, M, N);
6319  set_matrix(all_penalties, M, N);
6320  set_string_list(all_transform, N);
6321  set_vector(min_values,N);
6322  set_vector(max_values,N);
6323  set_vector(all_use_cache,N);
6324  set_vector(all_use_svm,N);
6325  set_vector(all_do_calc,N);
6326 
6327  SG_FREE(ids);
6328  SG_FREE(max_values);
6329  SG_FREE(min_values);
6330  SG_FREE(names);
6331  SG_FREE(all_transform);
6332  SG_FREE(all_limits);
6333  SG_FREE(all_penalties);
6334  SG_FREE(all_use_cache);
6335  SG_FREE(all_use_svm);
6336  SG_FREE(all_do_calc);
6337 
6338  return true;
6339 }
6340 /*bool CSGInterface::cmd_signals_set_model()
6341 {
6342  // ARG 1
6343  int32_t len=0;
6344  char* filename;
6345  filename = get_string(len);
6346 
6347  CTrainPredMaster* tpm = new CTrainPredMaster(ui_kernel);
6348 
6349  tpm->read_models_from_file(filename);
6350 
6351  return true;
6352  }*/
6353 bool CSGInterface::cmd_signals_set_positions()
6354 {
6355  return true;
6356 }
6357 bool CSGInterface::cmd_signals_set_labels()
6358 {
6359  return true;
6360 }
6361 bool CSGInterface::cmd_signals_set_split()
6362 {
6363  return true;
6364 }
6365 bool CSGInterface::cmd_signals_set_train_mask()
6366 {
6367  return true;
6368 }
6369 bool CSGInterface::cmd_signals_add_feature()
6370 {
6371  return true;
6372 }
6373 bool CSGInterface::cmd_signals_add_kernel()
6374 {
6375  return true;
6376 }
6377 bool CSGInterface::cmd_signals_run()
6378 {
6379  return true;
6380 }
6381 
6382 bool CSGInterface::cmd_init_dyn_prog()
6383 {
6384  //ARG 1
6385  int32_t num_svms=get_int();
6386 
6387  CDynProg* h=new CDynProg(num_svms);
6388  ui_structure->set_dyn_prog(h);
6389  return true;
6390 }
6391 
6392 bool CSGInterface::cmd_clean_up_dyn_prog()
6393 {
6394  return ui_structure->cleanup();
6395 }
6396 
6397 bool CSGInterface::cmd_set_model()
6398 {
6399 
6400  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6401 
6402  CDynProg* h = ui_structure->get_dyn_prog();
6403  int32_t num_svms = h->get_num_svms();
6404  //CDynProg* h=new CDynProg(Nweights/* = num_svms */);
6405 
6406  //ARG 1
6407  // transition pointers
6408  // link transitions to length, content, frame (and tiling)
6409  // plifs (#states x #states x 3 or 4)
6410  int32_t numDim=0;
6411  int32_t* Dim=0;
6412  float64_t* penalties_array=NULL;
6413  get_ndarray(penalties_array,Dim,numDim);
6414  ASSERT(numDim==3)
6415  ASSERT(Dim[0]==Dim[1])
6416 
6417  if (!pm->compute_plif_matrix(SGNDArray<float64_t>(penalties_array, Dim, numDim, false)))
6418  SG_ERROR("error computing plif matrix\n")
6419  ui_structure->set_num_states(Dim[0]);
6420  SG_FREE(penalties_array);
6421 
6422  // ARG 2
6423  // bool-> determines if orf information should be used
6424  bool use_orf = get_bool();
6425  ui_structure->set_use_orf(use_orf);
6426 
6427  // ARG 3
6428  // determines for which contents which orf should be used (#contents x 2)
6429  int32_t Nmod=0;
6430  int32_t Mmod=0;
6431  int32_t* mod_words;
6432  get_matrix(mod_words, Nmod,Mmod);
6433  if (Nmod != num_svms)
6434  SG_ERROR("should be equal: Nmod: %i, num_svms: %i\n",Nmod,num_svms)
6435  ASSERT(Mmod == 2)
6436  h->init_mod_words_array(SGMatrix<int32_t>(mod_words, Nmod, Mmod));
6437 
6438  // ARG 4
6439  // links: states -> signal plifs (#states x 2)
6440  int32_t num_states=0;
6441  int32_t feat_dim3=0;
6442  int32_t* state_signals;
6443  get_matrix(state_signals,num_states,feat_dim3);
6444  ASSERT(num_states==Dim[0])
6445  pm->compute_signal_plifs(SGMatrix<int32_t>(state_signals, feat_dim3, num_states));
6446 
6447 
6448  // ARG 5
6449  // ORF info (#states x 2)
6450  int32_t Norf=0;
6451  int32_t Morf=0;
6452  int32_t* orf_info;
6453  get_matrix(orf_info,Norf,Morf);
6454  ASSERT(Norf==num_states)
6455  ASSERT(Morf==2)
6456 
6457  ui_structure->set_orf_info(orf_info, Norf, Morf);
6458  h->set_orf_info(SGMatrix<int32_t>(orf_info, Norf, Morf));
6459 
6460  h->set_num_states(num_states) ;
6461 
6462  return true;
6463 }
6464 
6465 bool CSGInterface::cmd_precompute_content_svms()
6466 {
6467 
6468  // ARG 1
6469  int32_t seq_len=0;
6470  char* seq;
6471  seq = get_string(seq_len);
6472 
6473  // ARG 2
6474  // all feature positions
6475  int32_t Npos=0;
6476  int32_t* all_pos;
6477  get_vector(all_pos, Npos);
6478 
6479  //ARG 3
6480  // content svm weights
6481  int32_t Nweights=0;
6482  int32_t num_svms=0;
6483  float64_t* weights;
6484  get_matrix(weights, Nweights, num_svms);
6485  if (Nweights!=5440)
6486  SG_PRINT("Dimension mismatch: got %i, expect %i\n", Nweights, 5440)
6487  ui_structure->set_content_svm_weights(weights, Nweights, num_svms);
6488 
6489  CDynProg* h = ui_structure->get_dyn_prog();
6490  if (!h)
6491  SG_ERROR("no DynProg object found, use init_dyn_prog first\n")
6492 
6493 
6494  //float64_t* weights = ui_structure->get_content_svm_weights();
6495  //int32_t Mweights = h->get_num_svms();
6496  //int32_t Nweights = ui_structure->get_num_svm_weights();
6497  h->set_pos(SGVector<int32_t>(all_pos, Npos));
6498  h->set_gene_string(SGVector<char>(seq, seq_len));
6499  h->create_word_string();
6501  h->init_content_svm_value_array(num_svms);
6502  h->set_dict_weights(SGMatrix<float64_t>(weights, Nweights, num_svms));
6504  SG_DEBUG("precompute_content_svms done\n")
6505  return true;
6506 }
6507 
6508 bool CSGInterface::cmd_get_lin_feat()
6509 {
6510  CDynProg* h = ui_structure->get_dyn_prog();
6511  if (!h)
6512  SG_ERROR("no DynProg object found, use set_model first\n")
6513 
6514 
6515  int32_t dim1, dim2 = 0;
6516  float64_t* lin_feat = h->get_lin_feat(dim1, dim2);
6517 
6518  set_matrix(lin_feat, dim1, dim2);
6519 
6520  return true;
6521 }
6522 bool CSGInterface::cmd_set_lin_feat()
6523 {
6524  // ARG 1
6525  int32_t Nseq=0;
6526  char* seq;
6527  seq = get_string(Nseq);
6528 
6529  // ARG 2
6530  // all feature positions
6531  int32_t Npos=0;
6532  int32_t* all_pos;
6533  get_vector(all_pos, Npos);
6534 
6535  //ARG 3
6536  //
6537  int32_t num_svms, seq_len;
6538  float64_t* lin_feat=NULL;
6539  get_matrix(lin_feat, num_svms, seq_len);
6540 
6541  if (Npos!=seq_len)
6542  {
6543  SG_ERROR("Dimension mismatch: got %i positions and (%ix%i) values\n", Npos, num_svms, seq_len)
6544 
6545  SG_FREE(lin_feat);
6546  SG_FREE(seq);
6547  SG_FREE(all_pos);
6548 
6549  return false ;
6550  }
6551 
6552  CDynProg* h = ui_structure->get_dyn_prog();
6553  if (!h)
6554  SG_ERROR("no DynProg object found, use set_model first\n")
6555 
6556  h->set_pos(SGVector<int32_t>(all_pos, Npos));
6557  h->set_gene_string(SGVector<char>(seq, Nseq));
6559  h->init_content_svm_value_array(num_svms);
6560  h->set_lin_feat(lin_feat, num_svms, seq_len);
6561 
6562  SG_FREE(lin_feat);
6563 
6564  return true;
6565 }
6566 bool CSGInterface::cmd_long_transition_settings()
6567 {
6568  bool use_long_transitions = get_bool();
6569  int32_t threshold = get_int();
6570  int32_t max_len = get_int();
6571 
6572  CDynProg* h = ui_structure->get_dyn_prog();
6573  if (!h)
6574  SG_ERROR("no DynProg object found, use set_model first\n")
6575 
6576  h->long_transition_settings(use_long_transitions, threshold, max_len);
6577 
6578  return true;
6579 }
6580 bool CSGInterface::cmd_set_feature_matrix()
6581 {
6582  int32_t num_states = ui_structure->get_num_states();
6583 
6584  //ARG 1
6585  // feature matrix (#states x #feature_positions x max_num_signals)
6586  int32_t* Dims=0;
6587  int32_t numDims=0;
6588  float64_t* features = NULL;
6589  get_ndarray(features, Dims, numDims);
6590 
6591  if (numDims!=3)
6592  SG_ERROR("expected a 3 dimensional array, got %i dimensions\n", numDims)
6593  if (Dims[0]!=num_states)
6594  SG_ERROR("number of rows (%i) not equal number of states (%i)\n",Dims[0], num_states)
6595  ASSERT(ui_structure->set_feature_matrix(features, Dims))
6596 
6597  ASSERT(ui_structure->set_feature_dims(Dims))
6598 
6599  SG_FREE(features);
6600  SG_FREE(Dims);
6601 
6602  return true;
6603 }
6604 bool CSGInterface::cmd_set_feature_matrix_sparse()
6605 {
6606  int32_t num_pos = ui_structure->get_num_positions();
6607  int32_t num_states = ui_structure->get_num_states();
6608 
6609  //ARG 1
6610  // feature matrix (#states x #feature_positions x max_num_signals)
6611  int32_t dim11, dim12 ;
6612  SGSparseVector<float64_t> *features1=NULL ;
6613  get_sparse_matrix(features1, dim11, dim12);
6614 
6615  int32_t dim21, dim22 ;
6616  SGSparseVector<float64_t> *features2=NULL ;
6617  get_sparse_matrix(features2, dim21, dim22);
6618 
6619  ASSERT(dim11==dim21)
6620  ASSERT(dim12==dim22)
6621 
6622  int32_t *Dims = SG_MALLOC(int32_t, 3);
6623  Dims[0]=dim11 ;
6624  Dims[1]=dim12 ;
6625  Dims[2]=2 ;
6626 
6627  ASSERT(Dims[0]==num_states)
6628  ASSERT(Dims[1]==num_pos)
6629 
6630  ASSERT(ui_structure->set_feature_matrix_sparse(features1, features2, Dims))
6631  ASSERT(ui_structure->set_feature_dims(Dims))
6632 
6633  SG_FREE(features1);
6634  SG_FREE(features2);
6635  SG_FREE(Dims);
6636 
6637  return true;
6638 }
6639 bool CSGInterface::cmd_init_intron_list()
6640 {
6641  //ARG1 start_positions
6642  int32_t Nstart_positions;
6643  int32_t* start_positions;
6644  get_vector(start_positions, Nstart_positions);
6645  //SG_PRINT("Nstart_positions:%i\n",Nstart_positions)
6646 
6647  //ARG2 end_positions
6648  int32_t Nend_positions;
6649  int32_t* end_positions;
6650  get_vector(end_positions, Nend_positions);
6651  //SG_PRINT("Nend_positions:%i\n",Nend_positions)
6652 
6653  //ARG3 quality
6654  int32_t Nquality;
6655  int32_t* quality;
6656  get_vector(quality, Nquality);
6657  //SG_PRINT("Nquality:%i\n",Nquality)
6658 
6659  //ARG4 all candidate positions
6660  int32_t Nall_pos;
6661  int32_t* all_pos;
6662  get_vector(all_pos, Nall_pos);
6663  //SG_PRINT("Nall_pos:%i\n",Nall_pos)
6664 
6665  ASSERT(Nquality==Nend_positions)
6666  ASSERT(Nend_positions==Nstart_positions)
6667 
6668  CIntronList* intron_list = new CIntronList();
6669 
6670  intron_list->init_list(all_pos, Nall_pos);
6671 
6672  intron_list->read_introns(start_positions, end_positions, quality, Nstart_positions);
6673 
6674  SG_FREE(start_positions);
6675  SG_FREE(end_positions);
6676  SG_FREE(quality);
6677  SG_FREE(all_pos);
6678 
6679  //int32_t test;
6680  //int32_t testq;
6681  //intron_list->get_coverage(&test, &testq, 15 ,16);
6682 
6683  //SG_PRINT("coverage: %i, quality: %i\n",test, testq)
6684 
6685  CDynProg* h = ui_structure->get_dyn_prog();
6686  if (!h)
6687  SG_ERROR("no DynProg object found, use set_model first\n")
6688 
6689  h->set_intron_list(intron_list, 2);
6690 
6691  return true;
6692 }
6693 bool CSGInterface::cmd_precompute_tiling_features()
6694 {
6695  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6696  CPlif** PEN = pm->get_PEN();
6697  CDynProg* h = ui_structure->get_dyn_prog();
6698 
6699  int32_t Nintensities=0;
6700  float64_t* intensities;
6701  get_vector(intensities, Nintensities);
6702 
6703  int32_t Nprobe_pos=0;
6704  int32_t* probe_pos;
6705  get_vector(probe_pos, Nprobe_pos);
6706  ASSERT(Nprobe_pos==Nintensities)
6707 
6708  int32_t Ntiling_plif_ids=0;
6709  int32_t* tiling_plif_ids;
6710  get_vector(tiling_plif_ids, Ntiling_plif_ids);
6711 
6712  h->init_tiling_data(probe_pos,intensities, Nprobe_pos);
6713  h->precompute_tiling_plifs(PEN, tiling_plif_ids, Ntiling_plif_ids);
6714  return true;
6715 }
6716 
6717 bool CSGInterface::cmd_best_path_trans()
6718 {
6719  CDynProg* h = ui_structure->get_dyn_prog();
6720 
6721  CSegmentLoss* seg_loss_obj = h->get_segment_loss_object();
6722 
6723  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6724 
6725  int32_t num_states = h->get_num_states();
6726  int32_t* feat_dims = ui_structure->get_feature_dims();
6727  float64_t* features = (ui_structure->get_feature_matrix(false));
6728  CSparseFeatures<float64_t>* features_sparse1 = (ui_structure->get_feature_matrix_sparse(0));
6729  CSparseFeatures<float64_t>* features_sparse2 = (ui_structure->get_feature_matrix_sparse(1));
6730  int32_t* orf_info = ui_structure->get_orf_info();
6731  bool use_orf = ui_structure->get_use_orf();
6732  int32_t Nplif = pm->get_num_plifs();
6733 
6734  // ARG 1
6735  // transitions from initial state (#states x 1)
6736  int32_t Np=0;
6737  float64_t* p;
6738  get_vector(p, Np);
6739  if (Np!=num_states)
6740  SG_ERROR("# transitions from initial state (%i) does not match # states (%i)\n", Np, num_states)
6741 
6742  // ARG 2
6743  // transitions to end state (#states x 1)
6744  int32_t Nq=0;
6745  float64_t* q;
6746  get_vector(q, Nq);
6747  if (Nq!=num_states)
6748  SG_ERROR("# transitions to end state (%i) does not match # states (%i)\n", Nq, num_states)
6749 
6750  // ARG 3
6751  // number of best paths
6752  int32_t Nnbest=0;
6753  int32_t* all_nbest;
6754  get_vector(all_nbest, Nnbest);
6755  int32_t nbest;
6756  int32_t nother = 0;
6757  if (Nnbest==2)
6758  {
6759  nbest =all_nbest[0];
6760  nother=all_nbest[1];
6761  }
6762  else
6763  nbest =all_nbest[0];
6764  SG_FREE(all_nbest);
6765 
6766  // ARG 4
6767  // segment path (2 x #feature_positions)
6768  // masking/weighting of loss for specific
6769  // regions of the true path
6770  int32_t Nseg_path=0;
6771  int32_t Mseg_path=0;
6772  float64_t* seg_path;
6773  get_matrix(seg_path, Nseg_path, Mseg_path);
6774 
6775  // ARG 5
6776  // links for transitions (#transitions x 4)
6777  int32_t Na_trans=0;
6778  int32_t num_a_trans=0;
6779  float64_t* a_trans;
6780  get_matrix(a_trans, num_a_trans, Na_trans);
6781 
6782  // ARG 6
6783  // loss matrix (#segment x 2*#segments)
6784  // one (#segment x #segments)-matrix for segment loss
6785  // and one for nucleotide loss
6786  int32_t Nloss=0;
6787  int32_t Mloss=0;
6788  float64_t* loss;
6789  get_matrix(loss, Nloss,Mloss);
6790 
6791  int32_t M = h->get_num_positions();
6792 
6794  // check input
6796  ASSERT(num_states==Nq)
6797 
6798  CPlif** PEN=pm->get_PEN();
6799  ASSERT(PEN)
6800 
6801  h->set_p_vector(SGVector<float64_t>(p, num_states));
6802  h->set_q_vector(SGVector<float64_t>(q, num_states));
6803 
6804  if (seg_path!=NULL)
6805  {
6806  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, Na_trans)) ;
6807  }
6808  else
6809  {
6810  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, 3)) ; // segment_id = 0
6811  }
6812 
6813  if (!h->check_svm_arrays())
6814  {
6815  SG_ERROR("svm arrays inconsistent\n")
6816  CPlif::delete_penalty_struct(PEN, Nplif) ;
6817  return false ;
6818  }
6819 
6820  SG_DEBUG("best_path_trans: M: %i, Mseg_path: %i\n", M, Mseg_path)
6821 
6822  h->set_observation_matrix(SGNDArray<float64_t>(features, feat_dims, 3, false));
6823 
6824  if (seg_path!=NULL)
6825  {
6826  h->best_path_set_segment_loss(SGMatrix<float64_t>(loss, Nloss, Mloss, false)) ;
6827  seg_loss_obj->set_segment_loss(loss, Nloss, Mloss);
6828  }
6829  else
6830  {
6831  float64_t zero2[2] = {0.0, 0.0} ;
6833  seg_loss_obj->set_segment_loss(zero2, 2, 1);
6834  }
6835  h->set_content_type_array(SGMatrix<float64_t>(seg_path,Nseg_path,Mseg_path));
6836 
6837  bool segment_loss_non_zero=false;
6838  for (int32_t i=0; i<Nloss*Mloss; i++)
6839  {
6840  if (loss[i]>1e-3)
6841  segment_loss_non_zero=true;
6842  }
6843 
6844  SG_FREE(loss);
6845  loss=NULL;
6846 
6847  h->set_orf_info(SGMatrix<int32_t>(orf_info, num_states, 2));
6848  h->set_sparse_features(features_sparse1, features_sparse2);
6849  h->set_plif_matrices(pm);
6850 
6851  if (segment_loss_non_zero)
6852  {
6853  SG_DEBUG("Using version with segment_loss\n")
6854  if (nbest==1)
6855  h->compute_nbest_paths(feat_dims[2], use_orf, 1,true,false);
6856  else
6857  h->compute_nbest_paths(feat_dims[2], use_orf, 2,true,false);
6858  }
6859  else
6860  {
6861  SG_DEBUG("Using version without segment_loss\n")
6862  if (nbest==1)
6863  h->compute_nbest_paths(feat_dims[2], use_orf, 1,false,false);
6864  else
6865  h->compute_nbest_paths(feat_dims[2], use_orf, 2,false,false);
6866  }
6867 
6868  SGVector<float64_t> p_prob=h->get_scores();
6869 
6870  SGMatrix<int32_t> states=h->get_states();
6871 
6872  SGMatrix<int32_t> my_pos=h->get_positions();
6873 
6874  // transcribe result
6875  float64_t* d_my_path= SG_MALLOC(float64_t, (nbest+nother)*M);
6876  float64_t* d_my_pos= SG_MALLOC(float64_t, (nbest+nother)*M);
6877 
6878  for (int32_t k=0; k<(nbest+nother); k++)
6879  {
6880  for (int32_t i=0; i<M; i++)
6881  {
6882  d_my_path[i*(nbest+nother)+k] = states.matrix[i+k*M] ;
6883  d_my_pos[i*(nbest+nother)+k] = my_pos.matrix[i+k*M] ;
6884  }
6885  }
6886  set_vector(p_prob.vector,nbest+nother);
6887  set_vector(d_my_path, (nbest+nother)*M);
6888  set_vector(d_my_pos, (nbest+nother)*M);
6889 
6890  SG_FREE(d_my_path);
6891  SG_FREE(d_my_pos);
6892 
6893  return true;
6894 
6895 }
6896 
6897 bool CSGInterface::cmd_best_path_trans_deriv()
6898 {
6899  int32_t num_states = ui_structure->get_num_states();
6900  int32_t* feat_dims = ui_structure->get_feature_dims();
6901  float64_t* features = (ui_structure->get_feature_matrix(false));
6902 
6903  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6904  int32_t Nplif = pm->get_num_plifs();
6905  CPlif** PEN = pm->get_PEN();
6906 
6907  // ARG 1
6908  // transitions from initial state (#states x 1)
6909  int32_t Np=0;
6910  float64_t* p=NULL;
6911  get_vector(p, Np);
6912  if (Np!=num_states)
6913  SG_ERROR("Np!=num_states; Np:%i num_states:%i",Np,num_states)
6914 
6915  // ARG 2
6916  // transitions to end state (#states x 1)
6917  int32_t Nq=0;
6918  float64_t* q=NULL;
6919  get_vector(q, Nq);
6920  if (Nq!=num_states)
6921  SG_ERROR("Nq!=num_states; Nq:%i num_states:%i",Nq,num_states)
6922 
6923 
6924  // ARG 3
6925  // segment path (2 x #feature_positions)
6926  // masking/weighting of loss for specific
6927  // regions of the true path
6928  int32_t Nseg_path=0;
6929  int32_t Mseg_path=0;
6930  float64_t* seg_path;
6931  get_matrix(seg_path,Nseg_path,Mseg_path);
6932 
6933  // ARG 4
6934  // links for transitions (#transitions x 4)
6935  int32_t Na_trans=0;
6936  int32_t num_a_trans=0;
6937  float64_t* a_trans=NULL;
6938  get_matrix(a_trans, num_a_trans, Na_trans);
6939 
6940  // ARG 5
6941  // loss matrix (#segment x 2*#segments)
6942  // one (#segment x #segments)-matrix for segment loss
6943  // and one for nucleotide loss
6944  int32_t Nloss=0;
6945  int32_t Mloss=0;
6946  float64_t* loss=NULL;
6947  get_matrix(loss, Nloss,Mloss);
6948 
6949  // ARG 6
6950  // path to calc derivative for
6951  int32_t Nmystate_seq=0;
6952  int32_t* mystate_seq=NULL;
6953  get_vector(mystate_seq, Nmystate_seq);
6954 
6955  // ARG 7
6956  // positions of the path
6957  int32_t Nmypos_seq=0;
6958  int32_t* mypos_seq=NULL;
6959  get_vector(mypos_seq, Nmypos_seq);
6960 
6961 
6962  //a => a_trans
6963 
6964  int32_t max_plif_id = 0 ;
6965  int32_t max_plif_len = 1 ;
6966  for (int32_t i=0; i<Nplif; i++)
6967  {
6968  if (i>0 && PEN[i]->get_id()!=i)
6969  SG_ERROR("PEN[i]->get_id()!=i; PEN[%i]->get_id():%i ,\n",i, PEN[i]->get_id())
6970  if (i>max_plif_id)
6971  max_plif_id=i ;
6972  if (PEN[i]->get_plif_len()>max_plif_len)
6973  max_plif_len=PEN[i]->get_plif_len() ;
6974  } ;
6975 
6976 
6977  CDynProg* h = ui_structure->get_dyn_prog();
6978  CSegmentLoss* seg_loss_obj = h->get_segment_loss_object();
6979  h->set_num_states(num_states) ;
6980  h->set_p_vector(SGVector<float64_t>(p, num_states)) ;
6981  h->set_q_vector(SGVector<float64_t>(q, num_states)) ;
6982 
6983  if (seg_path!=NULL)
6984  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, Na_trans)) ;
6985  else
6986  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, 3)) ;
6987 
6988  if (!h->check_svm_arrays())
6989  SG_ERROR("svm arrays inconsistent\n")
6990 
6991  int32_t *my_path = SG_MALLOC(int32_t, Nmypos_seq+1);
6992  memset(my_path, -1, Nmypos_seq*sizeof(int32_t)) ;
6993  int32_t *my_pos = SG_MALLOC(int32_t, Nmypos_seq+1);
6994  memset(my_pos, -1, Nmypos_seq*sizeof(int32_t)) ;
6995 
6996  h->set_observation_matrix(SGNDArray<float64_t>(features, feat_dims, 3));
6997  for (int32_t i=0; i<Nmypos_seq; i++)
6998  {
6999  my_path[i] = mystate_seq[i] ;
7000  my_pos[i] = mypos_seq[i] ;
7001  }
7002 
7003  if (seg_path!=NULL)
7004  {
7005  h->best_path_set_segment_loss(SGMatrix<float64_t>(loss, Nloss, Mloss)) ;
7006  seg_loss_obj->set_segment_loss(loss, Nloss, Mloss);
7007  }
7008  else
7009  {
7010  float64_t zero2[2] = {0.0, 0.0} ;
7011  h->best_path_set_segment_loss(SGMatrix<float64_t>(zero2, 2, 1, false)) ;
7012  seg_loss_obj->set_segment_loss(zero2, 2, 1);
7013  }
7014  h->set_content_type_array(SGMatrix<float64_t>(seg_path,Nseg_path,Mseg_path));
7015 
7016  float64_t* p_Plif_deriv = SG_MALLOC(float64_t, (max_plif_id+1)*max_plif_len);
7017  CDynamicArray <float64_t> a_Plif_deriv(p_Plif_deriv, max_plif_id+1, max_plif_len, false, false) ; // 2d
7018 
7019  float64_t* p_A_deriv = SG_MALLOC(float64_t, num_states*num_states);
7020  float64_t* p_p_deriv = SG_MALLOC(float64_t, num_states);
7021  float64_t* p_q_deriv = SG_MALLOC(float64_t, num_states);
7022 
7023  h->set_plif_matrices(pm);
7024  h->best_path_trans_deriv(my_path, my_pos, Nmypos_seq, features, feat_dims[2]);
7025 
7026  float64_t* p_my_scores;
7027  int32_t n_scores;
7028  h->get_path_scores(&p_my_scores, &n_scores);
7029 
7030  float64_t* p_my_losses;
7031  int32_t n_losses;
7032  h->get_path_losses(&p_my_losses, &n_losses);
7033 
7034  for (int32_t i=0; i<num_states; i++)
7035  {
7036  for (int32_t j=0; j<num_states; j++)
7037  p_A_deriv[i+j*num_states] = h->get_a_deriv(i, j) ;
7038 
7039  p_p_deriv[i]=h->get_p_deriv(i) ;
7040  p_q_deriv[i]=h->get_q_deriv(i) ;
7041  }
7042 
7043  for (int32_t id=0; id<=max_plif_id; id++)
7044  {
7045  int32_t len=0 ;
7046  const float64_t * deriv = PEN[id]->get_cum_derivative(len) ;
7047  ASSERT(len<=max_plif_len)
7048  for (int32_t j=0; j<max_plif_len; j++)
7049  a_Plif_deriv.element(id, j)= deriv[j] ;
7050  }
7051 
7052  set_vector(p_p_deriv, num_states);
7053  set_vector(p_q_deriv, num_states);
7054  set_matrix(p_A_deriv, num_states, num_states);
7055  set_matrix(p_Plif_deriv, (max_plif_id+1), max_plif_len);
7056  set_vector(p_my_scores, Nmypos_seq);
7057  set_vector(p_my_losses, Nmypos_seq);
7058 
7059  SG_FREE(p_A_deriv);
7060  SG_FREE(p_p_deriv);
7061  SG_FREE(p_q_deriv);
7062  SG_FREE(p_Plif_deriv);
7063  free(p_my_scores);
7064  free(p_my_losses);
7065 
7066  SG_FREE(my_path);
7067  SG_FREE(my_pos);
7068 
7069  SG_FREE(p);
7070  SG_FREE(q);
7071  SG_FREE(a_trans);
7072  SG_FREE(loss);
7073  SG_FREE(mystate_seq);
7074  SG_FREE(mypos_seq);
7075 
7076  return true ;
7077 }
7078 
7079 bool CSGInterface::cmd_precompute_subkernels()
7080 {
7081  if (m_nrhs!=1 || !create_return_values(0))
7082  return false;
7083 
7084  return ui_kernel->precompute_subkernels();
7085 }
7086 bool CSGInterface::cmd_crc()
7087 {
7088  if (m_nrhs!=2 || !create_return_values(1))
7089  return false;
7090 
7091  int32_t slen=0;
7092  char* string=get_string(slen);
7093  ASSERT(string)
7094  uint8_t* bstring=SG_MALLOC(uint8_t, slen);
7095 
7096  for (int32_t i=0; i<slen; i++)
7097  bstring[i]=string[i];
7098  SG_FREE(string);
7099 
7100  int32_t val=CHash::crc32(bstring, slen);
7101  SG_FREE(bstring);
7102  set_int(val);
7103 
7104  return true;
7105 }
7106 
7107 bool CSGInterface::cmd_system()
7108 {
7109  if (m_nrhs<2 || !create_return_values(0))
7110  return false;
7111 
7112  int32_t len=0;
7113  char* command=SG_MALLOC(char, 10000);
7114  memset(command, 0, sizeof(char)*10000);
7115  char* cmd=get_str_from_str_or_direct(len);
7116  strncat(command, cmd, 10000);
7117  SG_FREE(cmd);
7118 
7119  while (m_rhs_counter<m_nrhs)
7120  {
7121  strncat(command, " ", 10000);
7122  char* arg=get_str_from_str_or_direct(len);
7123  strncat(command, arg, 10000);
7124  SG_FREE(arg);
7125  }
7126 
7127  int32_t success=system(command);
7128 
7129  return (success==0);
7130 }
7131 
7132 bool CSGInterface::cmd_exit()
7133 {
7134  exit(0);
7135  return 0; //never reached but necessary to keep sun compiler happy
7136 }
7137 
7138 bool CSGInterface::cmd_exec()
7139 {
7140  if (m_nrhs<2 || !create_return_values(0))
7141  return false;
7142 
7143  int32_t len=0;
7144  char* filename=get_str_from_str_or_direct(len);
7145  FILE* file=fopen(filename, "r");
7146  if (!file)
7147  {
7148  SG_FREE(filename);
7149  SG_ERROR("Error opening file: %s.\n", filename)
7150  }
7151 
7152  while (!feof(file))
7153  {
7154  // FIXME: interpret lines as input
7155  break;
7156  }
7157 
7158  fclose(file);
7159  return true;
7160 }
7161 
7162 bool CSGInterface::cmd_set_output()
7163 {
7164  if (m_nrhs<2 || !create_return_values(0))
7165  return false;
7166 
7167  int32_t len=0;
7168  char* filename=get_str_from_str_or_direct(len);
7169 
7170  if (file_out)
7171  fclose(file_out);
7172  file_out=NULL;
7173 
7174  SG_INFO("Setting output file to: %s.\n", filename)
7175 
7176  if (strmatch(filename, "STDERR"))
7177  io->set_target(stderr);
7178  else if (strmatch(filename, "STDOUT"))
7179  io->set_target(stdout);
7180  else
7181  {
7182  file_out=fopen(filename, "w");
7183  if (!file_out)
7184  SG_ERROR("Error opening output file %s.\n", filename)
7185  io->set_target(file_out);
7186  }
7187 
7188  return true;
7189 }
7190 
7191 bool CSGInterface::cmd_set_threshold()
7192 {
7193  if (m_nrhs!=2 || !create_return_values(0))
7194  return false;
7195 
7196  float64_t value=get_real_from_real_or_str();
7197 
7198  ui_math->set_threshold(value);
7199  return true;
7200 }
7201 
7202 bool CSGInterface::cmd_init_random()
7203 {
7204  if (m_nrhs!=2 || !create_return_values(0))
7205  return false;
7206 
7207  uint32_t initseed=(uint32_t) get_int_from_int_or_str();
7208  ui_math->init_random(initseed);
7209 
7210  return true;
7211 }
7212 
7213 bool CSGInterface::cmd_set_num_threads()
7214 {
7215  if (m_nrhs!=2 || !create_return_values(0))
7216  return false;
7217 
7218  int32_t num_threads=get_int_from_int_or_str();
7219 
7220  parallel->set_num_threads(num_threads);
7221  SG_INFO("Set number of threads to %d.\n", num_threads)
7222 
7223  return true;
7224 }
7225 
7226 bool CSGInterface::cmd_translate_string()
7227 {
7228  if (m_nrhs!=4 || !create_return_values(1))
7229  return false;
7230 
7231  float64_t* string=NULL;
7232  int32_t len;
7233  get_vector(string, len);
7234 
7235  int32_t order=get_int();
7236  int32_t start=get_int();
7237 
7238  const int32_t max_val=2; /* DNA->2bits */
7239  int32_t i,j;
7240  uint16_t* obs=SG_MALLOC(uint16_t, len);
7241 
7242  for (i=0; i<len; i++)
7243  {
7244  switch ((char) string[i])
7245  {
7246  case 'A': obs[i]=0; break;
7247  case 'C': obs[i]=1; break;
7248  case 'G': obs[i]=2; break;
7249  case 'T': obs[i]=3; break;
7250  case 'a': obs[i]=0; break;
7251  case 'c': obs[i]=1; break;
7252  case 'g': obs[i]=2; break;
7253  case 't': obs[i]=3; break;
7254  default: SG_ERROR("Wrong letter in string.\n")
7255  }
7256  }
7257 
7258  //convert interval of size T
7259  for (i=len-1; i>=order-1; i--)
7260  {
7261  uint16_t value=0;
7262  for (j=i; j>=i-order+1; j--)
7263  value=(value>>max_val) | ((obs[j])<<(max_val*(order-1)));
7264 
7265  obs[i]=(uint16_t) value;
7266  }
7267 
7268  for (i=order-2;i>=0;i--)
7269  {
7270  uint16_t value=0;
7271  for (j=i; j>=i-order+1; j--)
7272  {
7273  value= (value >> max_val);
7274  if (j>=0)
7275  value|=(obs[j]) << (max_val * (order-1));
7276  }
7277  obs[i]=value;
7278  }
7279 
7280  float64_t* real_obs=SG_MALLOC(float64_t, len);
7281  for (i=start; i<len; i++)
7282  real_obs[i-start]=(float64_t) obs[i];
7283  SG_FREE(obs);
7284 
7285  set_vector(real_obs, len);
7286  SG_FREE(real_obs);
7287 
7288  return true;
7289 }
7290 
7291 bool CSGInterface::cmd_clear()
7292 {
7293  // reset guilib
7294  SG_UNREF(ui_classifier);
7295  ui_classifier=new CGUIClassifier(this);
7296  SG_UNREF(ui_distance);
7297  ui_distance=new CGUIDistance(this);
7298  SG_UNREF(ui_features);
7299  ui_features=new CGUIFeatures(this);
7300  SG_UNREF(ui_hmm);
7301  ui_hmm=new CGUIHMM(this);
7302  SG_UNREF(ui_kernel);
7303  ui_kernel=new CGUIKernel(this);
7304  SG_UNREF(ui_labels);
7305  ui_labels=new CGUILabels(this);
7306  SG_UNREF(ui_math);
7307  ui_math=new CGUIMath(this);
7308  SG_UNREF(ui_pluginestimate);
7309  ui_pluginestimate=new CGUIPluginEstimate(this);
7310  SG_UNREF(ui_preproc);
7311  ui_preproc=new CGUIPreprocessor(this);
7312  SG_UNREF(ui_time);
7313  ui_time=new CGUITime(this);
7314 
7315  return true;
7316 }
7317 
7318 bool CSGInterface::cmd_tic()
7319 {
7320  ui_time->start();
7321  return true;
7322 }
7323 
7324 bool CSGInterface::cmd_toc()
7325 {
7326  ui_time->stop();
7327  return true;
7328 }
7329 
7330 bool CSGInterface::cmd_print()
7331 {
7332  if (m_nrhs<2 || !create_return_values(0))
7333  return false;
7334 
7335  int32_t len=0;
7336  char* msg=get_str_from_str_or_direct(len);
7337 
7338  SG_PRINT("%s\n", msg)
7339 
7340  SG_FREE(msg);
7341  return true;
7342 }
7343 
7344 bool CSGInterface::cmd_echo()
7345 {
7346  if (m_nrhs<2 || !create_return_values(0))
7347  return false;
7348 
7349  int32_t len=0;
7350  char* level=get_str_from_str_or_direct(len);
7351 
7352  if (strmatch(level, "OFF"))
7353  {
7354  echo=false;
7355  SG_INFO("Echo is off.\n")
7356  }
7357  else
7358  {
7359  echo=true;
7360  SG_INFO("Echo is on.\n")
7361  }
7362 
7363  SG_FREE(level);
7364  return true;
7365 }
7366 
7367 bool CSGInterface::cmd_loglevel()
7368 {
7369  if (m_nrhs<2 || !create_return_values(0))
7370  return false;
7371 
7372  int32_t len=0;
7373  char* level=get_str_from_str_or_direct(len);
7374 
7375  if (strmatch(level, "ALL") || strmatch(level, "GCDEBUG"))
7376  io->set_loglevel(MSG_GCDEBUG);
7377  else if (strmatch(level, "DEBUG"))
7378  io->set_loglevel(MSG_DEBUG);
7379  else if (strmatch(level, "INFO"))
7380  io->set_loglevel(MSG_INFO);
7381  else if (strmatch(level, "NOTICE"))
7382  io->set_loglevel(MSG_NOTICE);
7383  else if (strmatch(level, "WARN"))
7384  io->set_loglevel(MSG_WARN);
7385  else if (strmatch(level, "ERROR"))
7386  io->set_loglevel(MSG_ERROR);
7387  else if (strmatch(level, "CRITICAL"))
7388  io->set_loglevel(MSG_CRITICAL);
7389  else if (strmatch(level, "ALERT"))
7390  io->set_loglevel(MSG_ALERT);
7391  else if (strmatch(level, "EMERGENCY"))
7392  io->set_loglevel(MSG_EMERGENCY);
7393  else
7394  SG_ERROR("Unknown loglevel '%s'.\n", level)
7395 
7396  SG_INFO("Loglevel set to %s.\n", level)
7397 
7398  SG_FREE(level);
7399  return true;
7400 }
7401 
7402 bool CSGInterface::cmd_syntax_highlight()
7403 {
7404  if (m_nrhs<2 || !create_return_values(0))
7405  return false;
7406 
7407  int32_t len=0;
7408  char* hili=get_str_from_str_or_direct(len);
7409 
7410  if (strmatch(hili, "ON"))
7411  {
7413  io->enable_syntax_highlighting();
7414  }
7415  else if (strmatch(hili, "OFF"))
7416  {
7418  io->disable_syntax_highlighting();
7419  }
7420  else
7421  SG_ERROR("arguments to " N_SYNTAX_HIGHLIGHT " are ON|OFF - found '%s'.\n", hili)
7422 
7423  SG_INFO("Syntax hilighting set to %s.\n", hili)
7424 
7425  SG_FREE(hili);
7426  return true;
7427 }
7428 
7429 bool CSGInterface::cmd_progress()
7430 {
7431  if (m_nrhs<2 || !create_return_values(0))
7432  return false;
7433 
7434  int32_t len=0;
7435  char* progress=get_str_from_str_or_direct(len);
7436 
7437  if (strmatch(progress, "ON"))
7438  io->enable_progress();
7439  else if (strmatch(progress, "OFF"))
7440  io->disable_progress();
7441  else
7442  SG_ERROR("arguments to progress are ON|OFF - found '%s'.\n", progress)
7443 
7444  SG_INFO("Progress set to %s.\n", progress)
7445 
7446  SG_FREE(progress);
7447  return true;
7448 }
7449 
7450 bool CSGInterface::cmd_get_version()
7451 {
7452  if (m_nrhs!=1 || !create_return_values(1))
7453  return false;
7454 
7455  set_int(version->get_version_revision());
7456 
7457  return true;
7458 }
7459 
7460 bool CSGInterface::cmd_help()
7461 {
7462  if ((m_nrhs!=1 && m_nrhs!=2) || !create_return_values(0))
7463  return false;
7464 
7465  int32_t i=0;
7466 
7467  SG_PRINT("\n")
7468  if (m_nrhs==1) // unspecified help
7469  {
7470  SG_PRINT("Help is available for the following topics.\n"
7471  "-------------------------------------------\n\n");
7472  while (sg_methods[i].command)
7473  {
7474  bool is_group_item=false;
7475  if (!sg_methods[i].method && !sg_methods[i].usage_prefix)
7476  is_group_item=true;
7477 
7478  if (is_group_item)
7479  {
7480  SG_PRINT("%s%s%s\n",
7482  sg_methods[i].command,
7484  }
7485 
7486  i++;
7487  }
7488  SG_PRINT("\nUse sg('%shelp%s', '%s<topic>%s')"
7489  " to see the list of commands in this group, e.g.\n\n"
7490  "\tsg('%shelp%s', '%sFeatures%s')\n\n"
7491  "to see the list of commands for the 'Features' group.\n"
7492  "\nOr use sg('%shelp%s', '%sall%s')"
7493  " to see a brief listing of all commands.\n\nTo disable syntax"
7494  " highlighting (useful e.g. in the matlab GUI) use\n\n"
7495  "\tsg('syntax_highlight','OFF')\n",
7502  }
7503  else // m_nrhs == 2 -> all commands, single command or group help
7504  {
7505  bool found=false;
7506  bool in_group=false;
7507  int32_t clen=0;
7508  char* command=get_string(clen);
7509 
7510  if (strmatch("doxygen", command) || strmatch("DOXYGEN", command))
7511  {
7512  found=true;
7513  while (sg_methods[i].command)
7514  {
7515  if (sg_methods[i].usage_prefix) // display group item
7516  {
7517  SG_PRINT("\\arg \\b %s \\verbatim %s%s%s \\endverbatim\n",
7518  sg_methods[i].command,
7519  sg_methods[i].usage_prefix,
7520  sg_methods[i].command,
7521  sg_methods[i].usage_suffix);
7522  }
7523  else if (!sg_methods[i].method) // display group
7524  {
7525  SG_PRINT("\n\\section %s_sec %s\n",
7526  sg_methods[i].command, sg_methods[i].command);
7527  }
7528  i++;
7529  }
7530  }
7531  if (strmatch("all", command) || strmatch("ALL", command))
7532  {
7533  found=true;
7534  while (sg_methods[i].command)
7535  {
7536  if (sg_methods[i].usage_prefix) // display group item
7537  {
7538  SG_PRINT("\t%s%s%s%s%s\n", sg_methods[i].usage_prefix,
7540  sg_methods[i].command,
7542  sg_methods[i].usage_suffix);
7543  }
7544  else if (!sg_methods[i].method) // display group
7545  {
7546  SG_PRINT("\nCommands in group %s%s%s\n",
7548  sg_methods[i].command,
7550  }
7551  i++;
7552  }
7553  }
7554  else
7555  {
7556  while (sg_methods[i].command)
7557  {
7558  if (in_group)
7559  {
7560  if (sg_methods[i].usage_prefix) // display group item
7561  SG_PRINT("\t%s%s%s\n",
7563  sg_methods[i].command,
7565  else // next group reached -> end
7566  break;
7567  }
7568  else
7569  {
7570  found=strmatch(sg_methods[i].command, command);
7571  if (found)
7572  {
7573  if (sg_methods[i].usage_prefix) // found item
7574  {
7575  SG_PRINT("Usage for %s%s%s\n\n\t%s%s%s%s%s\n",
7577  sg_methods[i].command,
7579  sg_methods[i].usage_prefix,
7581  sg_methods[i].command,
7583  sg_methods[i].usage_suffix)
7584  break;
7585  }
7586  else // found group item
7587  {
7588  SG_PRINT("Commands in group %s%s%s\n\n",
7590  sg_methods[i].command,
7592  in_group=true;
7593  }
7594  }
7595  }
7596 
7597  i++;
7598  }
7599  }
7600 
7601  if (!found)
7602  SG_PRINT("Could not find help for command %s.\n", command)
7603  else if (in_group)
7604  {
7605  SG_PRINT("\n\nUse sg('%shelp%s', '%s<command>%s')"
7606  " to see the usage pattern of a single command, e.g.\n\n"
7607  "\tsg('%shelp%s', '%sclassify%s')\n\n"
7608  " to see the usage pattern of the command 'classify'.\n",
7613  }
7614 
7615  SG_FREE(command);
7616  }
7617 
7618 
7619  SG_PRINT("\n")
7620 
7621  return true;
7622 }
7623 #ifdef TRACE_MEMORY_ALLOCS
7624  extern CMap<void*, MemoryBlock>* sg_mallocs;
7625 #endif
7626 
7627 bool CSGInterface::cmd_whos()
7628 {
7629  if ((m_nrhs!=1) || !create_return_values(0))
7630  return false;
7631 
7632 #ifdef TRACE_MEMORY_ALLOCS
7633  SG_PRINT("Blocks allocated by shogun\n")
7634  list_memory_allocs();
7635  SG_PRINT("\n")
7636  return true;
7637 #else
7638  SG_PRINT("Requires shogun to be compiled with --enable-trace-mallocs\n")
7639  return false;
7640 #endif
7641 }
7642 
7643 bool CSGInterface::cmd_send_command()
7644 {
7646 
7647  int32_t len=0;
7648  char* arg=get_string(len);
7649  //SG_DEBUG("legacy: arg == %s\n", arg)
7650  m_legacy_strptr=arg;
7651 
7652  char* command=get_str_from_str(len);
7653  int32_t i=0;
7654  bool success=false;
7655 
7656  while (sg_methods[i].command)
7657  {
7658  if (strmatch(command, sg_methods[i].command))
7659  {
7660  SG_DEBUG("legacy: found command %s\n", sg_methods[i].command)
7661  // fix-up m_nrhs; +1 to include command
7662  m_nrhs=get_num_args_in_str()+1;
7663 
7664  if (!(interface->*(sg_methods[i].method))())
7665  {
7666  SG_ERROR("Usage: %s%s%s\n\n\t%s%s%s%s%s\n",
7668  sg_methods[i].command,
7670  sg_methods[i].usage_prefix,
7672  sg_methods[i].command,
7674  sg_methods[i].usage_suffix);
7675  }
7676  else
7677  {
7678  success=true;
7679  break;
7680  }
7681  }
7682 
7683  i++;
7684  }
7685 
7686  if (!success)
7687  SG_ERROR("Non-supported legacy command %s.\n", command)
7688 
7689  SG_FREE(command);
7690  SG_FREE(arg);
7691  return success;
7692 }
7693 
7694 bool CSGInterface::cmd_run_python()
7695 {
7696  SG_ERROR("Only available in the elwms interface\n")
7697  return false;
7698 }
7699 
7700 bool CSGInterface::cmd_run_octave()
7701 {
7702  SG_ERROR("Only available in the elwms interface\n")
7703  return false;
7704 }
7705 
7706 bool CSGInterface::cmd_run_r()
7707 {
7708  SG_ERROR("Only available in the elwms interface\n")
7709  return false;
7710 }
7711 
7712 bool CSGInterface::cmd_pr_loqo()
7713 {
7714  if (m_nrhs!=7 || !create_return_values(2))
7715  return false;
7716 
7717  float64_t* c=NULL;
7718  int32_t lenc=0;
7719  get_vector(c, lenc);
7720 
7721  int32_t n = lenc;
7722 
7723  float64_t* H=NULL;
7724  int32_t nH=0;
7725  int32_t mH=0;
7726  get_matrix(H, nH, mH);
7727  ASSERT(nH==n && mH==n)
7728 
7729  float64_t* A=NULL;
7730  int32_t nA=0;
7731  int32_t mA=0;
7732  get_matrix(A, nA, mA);
7733  ASSERT(mA==n)
7734  int32_t m=nA;
7735 
7736  float64_t* b=NULL;
7737  int32_t lenb=0;
7738  get_vector(b, lenb);
7739  ASSERT(lenb==m)
7740 
7741  float64_t* l=NULL;
7742  int32_t lenl=0;
7743  get_vector(l, lenl);
7744  ASSERT(lenl==n)
7745 
7746  float64_t* u=NULL;
7747  int32_t lenu=0;
7748  get_vector(u, lenu);
7749  ASSERT(lenu==n)
7750 
7751  float64_t* x=SG_MALLOC(float64_t, 3*n);
7752  SGVector<float64_t>::fill_vector(x, 3*n, 0.0);
7753 
7754  float64_t* y=SG_MALLOC(float64_t, m+2*n);
7755  SGVector<float64_t>::fill_vector(y, m+2*n, 0.0);
7756 
7757  pr_loqo(n,m, c, H, A, b, l, u, x, y, 0, 5, 50, 0.05, 100, 0);
7758 
7759  set_vector(x, n);
7760  set_vector(y, m);
7761 
7762  SG_FREE(c);
7763  SG_FREE(H);
7764  SG_FREE(A);
7765  SG_FREE(b);
7766  SG_FREE(l);
7767  SG_FREE(u);
7768  SG_FREE(x);
7769  SG_FREE(y);
7770  return true;
7771 }
7772 
7773 void CSGInterface::print_prompt()
7774 {
7775  SG_PRINT("%sshogun%s >> ",
7778 }
7779 
7781 // legacy-related methods
7783 
7784 char* CSGInterface::get_str_from_str_or_direct(int32_t& len)
7785 {
7786  if (m_legacy_strptr)
7787  return get_str_from_str(len);
7788  else
7789  return get_string(len);
7790 }
7791 
7792 int32_t CSGInterface::get_int_from_int_or_str()
7793 {
7794  if (m_legacy_strptr)
7795  {
7796  int32_t len=0;
7797  char* str=get_str_from_str(len);
7798  int32_t val=strtol(str, NULL, 10);
7799 
7800  SG_FREE(str);
7801  return val;
7802  }
7803  else
7804  return get_int();
7805 }
7806 
7807 float64_t CSGInterface::get_real_from_real_or_str()
7808 {
7809  if (m_legacy_strptr)
7810  {
7811  int32_t len=0;
7812  char* str=get_str_from_str(len);
7813  float64_t val=strtod(str, NULL);
7814 
7815  SG_FREE(str);
7816  return val;
7817  }
7818  else
7819  return get_real();
7820 }
7821 
7822 bool CSGInterface::get_bool_from_bool_or_str()
7823 {
7824  if (m_legacy_strptr)
7825  {
7826  int32_t len=0;
7827  char* str=get_str_from_str(len);
7828  bool val=strtol(str, NULL, 10)!=0;
7829 
7830  SG_FREE(str);
7831  return val;
7832  }
7833  else
7834  return get_bool();
7835 }
7836 
7837 void CSGInterface::get_vector_from_int_vector_or_str(int32_t*& vector, int32_t& len)
7838 {
7839  if (m_legacy_strptr)
7840  {
7841  len=get_vector_len_from_str(len);
7842  if (len==0)
7843  {
7844  vector=NULL;
7845  return;
7846  }
7847 
7848  vector=SG_MALLOC(int32_t, len);
7849  char* str=NULL;
7850  int32_t slen=0;
7851  for (int32_t i=0; i<len; i++)
7852  {
7853  str=get_str_from_str(slen);
7854  vector[i]=strtol(str, NULL, 10);
7855  //SG_DEBUG("vec[%d]: %d\n", i, vector[i])
7856  SG_FREE(str);
7857  }
7858  }
7859  else
7860  get_vector(vector, len);
7861 }
7862 
7863 void CSGInterface::get_vector_from_real_vector_or_str(
7864  float64_t*& vector, int32_t& len)
7865 {
7866  if (m_legacy_strptr)
7867  {
7868  len=get_vector_len_from_str(len);
7869  if (len==0)
7870  {
7871  vector=NULL;
7872  return;
7873  }
7874 
7875  vector=SG_MALLOC(float64_t, len);
7876  char* str=NULL;
7877  int32_t slen=0;
7878  for (int32_t i=0; i<len; i++)
7879  {
7880  str=get_str_from_str(slen);
7881  vector[i]=strtod(str, NULL);
7882  //SG_DEBUG("vec[%d]: %f\n", i, vector[i])
7883  SG_FREE(str);
7884  }
7885  }
7886  else
7887  get_vector(vector, len);
7888 }
7889 
7890 int32_t CSGInterface::get_vector_len_from_str(int32_t expected_len)
7891 {
7892  int32_t num_args=get_num_args_in_str();
7893 
7894  if (expected_len==0 || num_args==expected_len)
7895  return num_args;
7896  else if (num_args==2*expected_len)
7897  {
7898  // special case for position_weights; a bit shaky...
7899  return expected_len;
7900  }
7901  else
7902  SG_ERROR("Expected vector length %d does not match actual length %d.\n", expected_len, num_args)
7903 
7904  return 0;
7905 }
7906 
7907 char* CSGInterface::get_str_from_str(int32_t& len)
7908 {
7909  if (!m_legacy_strptr)
7910  return NULL;
7911 
7912  int32_t i=0;
7913  while (m_legacy_strptr[i]!='\0' && !isspace(m_legacy_strptr[i]))
7914  i++;
7915 
7916  len=i;
7917  char* str=SG_MALLOC(char, len+1);
7918  for (i=0; i<len; i++)
7919  str[i]=m_legacy_strptr[i];
7920  str[len]='\0';
7921 
7922  // move legacy strptr
7923  if (m_legacy_strptr[len]=='\0')
7924  m_legacy_strptr=NULL;
7925  else
7926  {
7927  m_legacy_strptr=m_legacy_strptr+len;
7928  m_legacy_strptr=SGIO::skip_spaces(m_legacy_strptr);
7929  }
7930 
7931  return str;
7932 }
7933 
7934 int32_t CSGInterface::get_num_args_in_str()
7935 {
7936  if (!m_legacy_strptr)
7937  return 0;
7938 
7939  int32_t count=0;
7940  int32_t i=0;
7941  bool in_arg=false;
7942  while (m_legacy_strptr[i]!='\0')
7943  {
7944  if (!isspace(m_legacy_strptr[i]) && !in_arg)
7945  {
7946  count++;
7947  in_arg=true;
7948  }
7949  else if (isspace(m_legacy_strptr[i]) && in_arg)
7950  in_arg=false;
7951 
7952  i++;
7953  }
7954 
7955  return count;
7956 }
7957 
7959 // handler
7961 
7962 bool CSGInterface::handle()
7963 {
7964  int32_t len=0;
7965  bool success=false;
7966 
7967 #ifndef WIN32
7969 #endif
7970 
7971  char* command=NULL;
7972  command=interface->get_command(len);
7973 
7974  SG_DEBUG("command: %s, nrhs %d\n", command, m_nrhs)
7975  int32_t i=0;
7976  while (sg_methods[i].command)
7977  {
7978  if (strmatch(command, sg_methods[i].command))
7979  {
7980  SG_DEBUG("found command %s%s%s\n",
7982  sg_methods[i].command,
7984 
7985  if (!(interface->*(sg_methods[i].method))())
7986  {
7987  if (sg_methods[i].usage_prefix)
7988  {
7989  SG_ERROR("Usage: %s%s%s\n\n\t%s%s%s%s%s\n",
7991  sg_methods[i].command,
7993  sg_methods[i].usage_prefix,
7995  sg_methods[i].command,
7997  sg_methods[i].usage_suffix);
7998  }
7999  else
8000  SG_ERROR("Non-supported command %s%s%s.\n",
8002  sg_methods[i].command,
8004  }
8005  else
8006  {
8007  success=true;
8008  break;
8009  }
8010  }
8011  i++;
8012  }
8013 
8014 #ifndef WIN32
8016 #endif
8017 
8018  if (!success)
8019  SG_ERROR("Unknown command %s%s%s.\n",
8021  command,
8023 
8024  SG_FREE(command);
8025  return success;
8026 }

SHOGUN Machine Learning Toolbox - Documentation