SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
SGInterface.cpp
Go to the documentation of this file.
4 
5 #include <shogun/lib/config.h>
6 #include <shogun/lib/DataType.h>
7 #include <shogun/lib/SGNDArray.h>
8 #include <shogun/lib/common.h>
11 #include <shogun/lib/Hash.h>
12 #include <shogun/lib/Map.h>
13 #include <shogun/lib/Signal.h>
14 
16 #include <shogun/lib/external/pr_loqo.h>
31 
33 
34 #include <shogun/structure/Plif.h>
40 
41 #include <ctype.h>
42 
43 using namespace shogun;
44 
45 CSGInterface* interface=NULL;
47 
48 #if defined(HAVE_CMDLINE)
49 #define USAGE(method) "", ""
50 #define USAGE_I(method, in) "", " " in ""
51 #define USAGE_O(method, out) "" out " = ", ""
52 #define USAGE_IO(method, in, out) "" out " = ", " " in ""
53 #define USAGE_COMMA " "
54 #define USAGE_STR ""
55 #elif defined(HAVE_R)
56 #define USAGE(method) "sg('", "')"
57 #define USAGE_I(method, in) "sg('", "', " in ")"
58 #define USAGE_O(method, out) "[" out "] <- sg('", "')"
59 #define USAGE_IO(method, in, out) "[" out "] <- sg('", "', " in ")"
60 #define USAGE_COMMA ", "
61 #define USAGE_STR "'"
62 #else
63 #define USAGE(method) "sg('", "')"
64 #define USAGE_I(method, in) "sg('", "', " in ")"
65 #define USAGE_O(method, out) "[" out "]=sg('", "')"
66 #define USAGE_IO(method, in, out) "[" out "]=sg('", "', " in ")"
67 #define USAGE_COMMA ", "
68 #define USAGE_STR "'"
69 #endif
70 
71 CSGInterfaceMethod sg_methods[]=
72 {
73  { "Features", NULL, NULL, NULL },
74 #ifdef USE_GPL_SHOGUN
75  {
76  N_PR_LOQO,
77  (&CSGInterface::cmd_pr_loqo),
79  "'Var1', Var1, 'Var2', Var2", "results")
80  },
81 #endif //USE_GPL_SHOGUN
82  {
84  (&CSGInterface::cmd_load_features),
86  "filename" USAGE_COMMA "feature_class" USAGE_COMMA "type" USAGE_COMMA "target[" USAGE_COMMA "size[" USAGE_COMMA "comp_features]]")
87  },
88  {
90  (&CSGInterface::cmd_save_features),
91  USAGE_I(N_SAVE_FEATURES, "filename" USAGE_COMMA "type" USAGE_COMMA "target")
92  },
93  {
95  (&CSGInterface::cmd_clean_features),
97  },
98  {
100  (&CSGInterface::cmd_get_features),
101  USAGE_IO(N_GET_FEATURES, USAGE_STR "TRAIN|TEST" USAGE_STR, "features")
102  },
103  {
105  (&CSGInterface::cmd_add_features),
107  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
108  },
109  {
111  (&CSGInterface::cmd_add_multiple_features),
113  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "repetitions" USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
114  },
115  {
117  (&CSGInterface::cmd_add_dotfeatures),
119  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
120  },
121  {
123  (&CSGInterface::cmd_set_features),
125  USAGE_STR "TRAIN|TEST" USAGE_STR
126  USAGE_COMMA "features["
127  USAGE_COMMA "DNABINFILE|<ALPHABET>]["
128  USAGE_COMMA "[from_position_list|slide_window]"
129  USAGE_COMMA "window size"
130  USAGE_COMMA "[position_list|shift]"
131  USAGE_COMMA "skip")
132  },
133  {
135  (&CSGInterface::cmd_set_reference_features),
137  },
138  {
140  (&CSGInterface::cmd_del_last_features),
142  },
143  {
144  N_CONVERT,
145  (&CSGInterface::cmd_convert),
146  USAGE_I(N_CONVERT, USAGE_STR "TRAIN|TEST" USAGE_STR
147  USAGE_COMMA "from_class"
148  USAGE_COMMA "from_type"
149  USAGE_COMMA "to_class"
150  USAGE_COMMA "to_type["
151  USAGE_COMMA "order"
152  USAGE_COMMA "start"
153  USAGE_COMMA "gap"
154  USAGE_COMMA "reversed]")
155  },
156  {
157  N_RESHAPE,
158  (&CSGInterface::cmd_reshape),
159  USAGE_I(N_RESHAPE, USAGE_STR "TRAIN|TEST"
160  USAGE_COMMA "num_feat"
161  USAGE_COMMA "num_vec")
162  },
163  {
165  (&CSGInterface::cmd_load_labels),
166  USAGE_I(N_LOAD_LABELS, "filename"
167  USAGE_COMMA USAGE_STR "TRAIN|TARGET" USAGE_STR)
168  },
169  {
170  N_SET_LABELS,
171  (&CSGInterface::cmd_set_labels),
173  USAGE_COMMA "labels")
174  },
175  {
176  N_GET_LABELS,
177  (&CSGInterface::cmd_get_labels),
178  USAGE_IO(N_GET_LABELS, USAGE_STR "TRAIN|TEST" USAGE_STR, "labels")
179  },
180 
181 
182  { "Kernel", NULL, NULL },
183  {
185  (&CSGInterface::cmd_set_kernel_normalization),
186  USAGE_I(N_SET_KERNEL_NORMALIZATION, "IDENTITY|AVGDIAG|SQRTDIAG|FIRSTELEMENT|VARIANCE|ZEROMEANCENTER"
187  USAGE_COMMA "size[" USAGE_COMMA "kernel-specific parameters]")
188  },
189  {
190  N_SET_KERNEL,
191  (&CSGInterface::cmd_set_kernel),
192  USAGE_I(N_SET_KERNEL, "type" USAGE_COMMA "size[" USAGE_COMMA "kernel-specific parameters]")
193  },
194  {
195  N_ADD_KERNEL,
196  (&CSGInterface::cmd_add_kernel),
197  USAGE_I(N_ADD_KERNEL, "weight" USAGE_COMMA "kernel-specific parameters")
198  },
199  {
201  (&CSGInterface::cmd_del_last_kernel),
203  },
204  {
206  (&CSGInterface::cmd_init_kernel),
208  },
209  {
211  (&CSGInterface::cmd_clean_kernel),
213  },
214  {
216  (&CSGInterface::cmd_save_kernel),
217  USAGE_I(N_SAVE_KERNEL, "filename" USAGE_COMMA USAGE_STR "TRAIN|TEST" USAGE_STR)
218  },
219  {
221  (&CSGInterface::cmd_get_kernel_matrix),
222  USAGE_IO(N_GET_KERNEL_MATRIX, "[" USAGE_STR "TRAIN|TEST" USAGE_STR, "K]")
223  },
224  {
226  (&CSGInterface::cmd_set_WD_position_weights),
228  },
229  {
231  (&CSGInterface::cmd_get_subkernel_weights),
233  },
234  {
236  (&CSGInterface::cmd_set_subkernel_weights),
238  },
239  {
241  (&CSGInterface::cmd_set_subkernel_weights_combined),
243  },
244  {
246  (&CSGInterface::cmd_get_dotfeature_weights_combined),
248  },
249  {
251  (&CSGInterface::cmd_set_dotfeature_weights_combined),
253  },
254  {
256  (&CSGInterface::cmd_set_last_subkernel_weights),
258  },
259  {
261  (&CSGInterface::cmd_get_WD_position_weights),
263  },
264  {
266  (&CSGInterface::cmd_get_last_subkernel_weights),
268  },
269  {
271  (&CSGInterface::cmd_compute_by_subkernels),
273  },
274  {
276  (&CSGInterface::cmd_init_kernel_optimization),
278  },
279  {
281  (&CSGInterface::cmd_get_kernel_optimization),
283  },
284  {
286  (&CSGInterface::cmd_delete_kernel_optimization),
288  },
289  {
291  (&CSGInterface::cmd_use_diagonal_speedup),
293  },
294  {
296  (&CSGInterface::cmd_set_kernel_optimization_type),
297  USAGE_I(N_SET_KERNEL_OPTIMIZATION_TYPE, USAGE_STR "FASTBUTMEMHUNGRY|SLOWBUTMEMEFFICIENT" USAGE_STR)
298  },
299  {
300  N_SET_SOLVER,
301  (&CSGInterface::cmd_set_solver),
302  USAGE_I(N_SET_SOLVER, USAGE_STR "AUTO|CPLEX|GLPK|INTERNAL" USAGE_STR)
303  },
304  {
306  (&CSGInterface::cmd_set_constraint_generator),
307  USAGE_I(N_SET_CONSTRAINT_GENERATOR, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS|LIBSVM"
308  "|SVMLIGHT|LIGHT|SVMLIGHT_ONECLASS|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
309  USAGE_STR)
310  },
311  {
313  (&CSGInterface::cmd_set_prior_probs),
314  USAGE_I(N_SET_PRIOR_PROBS, USAGE_STR "pos probs, neg_probs" USAGE_STR)
315  },
316  {
318  (&CSGInterface::cmd_set_prior_probs_from_labels),
320  },
321 #ifdef USE_SVMLIGHT
322  {
324  (&CSGInterface::cmd_resize_kernel_cache),
326  },
327 #endif //USE_SVMLIGHT
328 
329 
330  { "Distance", NULL, NULL },
331  {
333  (&CSGInterface::cmd_set_distance),
334  USAGE_I(N_SET_DISTANCE, "type" USAGE_COMMA "data type[" USAGE_COMMA "distance-specific parameters]")
335  },
336  {
338  (&CSGInterface::cmd_init_distance),
340  },
341  {
343  (&CSGInterface::cmd_get_distance_matrix),
345  },
346 
347 
348  { "Classifier", NULL, NULL },
349  {
350  N_CLASSIFY,
351  (&CSGInterface::cmd_classify),
352  USAGE_O(N_CLASSIFY, "result")
353  },
354  {
356  (&CSGInterface::cmd_classify),
357  USAGE_O(N_SVM_CLASSIFY, "result")
358  },
359  {
361  (&CSGInterface::cmd_classify_example),
362  USAGE_IO(N_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
363  },
364  {
366  (&CSGInterface::cmd_classify_example),
367  USAGE_IO(N_SVM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
368  },
369  {
371  (&CSGInterface::cmd_get_classifier),
372  USAGE_IO(N_GET_CLASSIFIER, "[index in case of MultiClassSVM]", "bias" USAGE_COMMA "weights")
373  },
374  {
376  (&CSGInterface::cmd_get_classifier),
377  USAGE_O(N_GET_CLUSTERING, "radi" USAGE_COMMA "centers|merge_distances" USAGE_COMMA "pairs")
378  },
379  {
380  N_NEW_SVM,
381  (&CSGInterface::cmd_new_classifier),
382  USAGE_I(N_NEW_SVM, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS|LIBSVM"
383  "|SVMLIGHT|LIGHT|LIGHT_ONECLASS|SVMLIN|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
384  "|WDSVMOCAS|SVMOCAS|SVMSGD|SVMBMRM|SVMPERF"
385  "|KERNELPERCEPTRON|PERCEPTRON|LIBLINEAR_LR|LIBLINEAR_L2|LDA"
386  "|LPM|LPBOOST|KNN" USAGE_STR)
387  },
388  {
390  (&CSGInterface::cmd_new_classifier),
391  USAGE_I(N_NEW_CLASSIFIER, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS"
392  "|LIBSVM|SVMLIGHT|LIGHT|LIGHT_ONECLASS|SVMLIN|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
393  "|WDSVMOCAS|SVMOCAS|SVMSGD|SVMBMRM|SVMPERF"
394  "|KERNELPERCEPTRON|PERCEPTRON|LIBLINEAR_LR|LIBLINEAR_L2|LDA"
395  "|LPM|LPBOOST|KNN" USAGE_STR)
396  },
397  {
399  (&CSGInterface::cmd_new_classifier),
400  USAGE_I(N_NEW_REGRESSION, USAGE_STR "SVRLIGHT|LIBSVR|KRR" USAGE_STR)
401  },
402  {
404  (&CSGInterface::cmd_new_classifier),
405  USAGE_I(N_NEW_CLUSTERING, USAGE_STR "KMEANS|HIERARCHICAL" USAGE_STR)
406  },
407  {
409  (&CSGInterface::cmd_load_classifier),
410  USAGE_O(N_LOAD_CLASSIFIER, "filename" USAGE_COMMA "type")
411  },
412  {
414  (&CSGInterface::cmd_save_classifier),
415  USAGE_I(N_SAVE_CLASSIFIER, "filename")
416  },
417  {
419  (&CSGInterface::cmd_get_num_svms),
420  USAGE_O(N_GET_NUM_SVMS, "number of SVMs in MultiClassSVM")
421  },
422  {
423  N_GET_SVM,
424  (&CSGInterface::cmd_get_svm),
425  USAGE_IO(N_GET_SVM, "[index in case of MultiClassSVM]", "bias" USAGE_COMMA "alphas")
426  },
427  {
428  N_SET_SVM,
429  (&CSGInterface::cmd_set_svm),
430  USAGE_I(N_SET_SVM, "bias" USAGE_COMMA "alphas")
431  },
432  {
434  (&CSGInterface::cmd_set_linear_classifier),
436  },
437  {
439  (&CSGInterface::cmd_get_svm_objective),
440  USAGE_O(N_GET_SVM_OBJECTIVE, "objective")
441  },
442  {
444  (&CSGInterface::cmd_compute_svm_primal_objective),
446  },
447  {
449  (&CSGInterface::cmd_compute_svm_dual_objective),
451  },
452  {
454  (&CSGInterface::cmd_compute_svm_primal_objective),
456  },
457  {
459  (&CSGInterface::cmd_compute_mkl_dual_objective),
461  },
462  {
464  (&CSGInterface::cmd_compute_relative_mkl_duality_gap),
466  },
467  {
469  (&CSGInterface::cmd_compute_absolute_mkl_duality_gap),
471  },
472  {
474  (&CSGInterface::cmd_do_auc_maximization),
476  },
477  {
479  (&CSGInterface::cmd_set_perceptron_parameters),
480  USAGE_I(N_SET_PERCEPTRON_PARAMETERS, "learnrate" USAGE_COMMA "maxiter")
481  },
482  {
484  (&CSGInterface::cmd_train_classifier),
485  USAGE_I(N_TRAIN_CLASSIFIER, "[classifier-specific parameters]")
486  },
487  {
489  (&CSGInterface::cmd_train_classifier),
491  },
492  {
494  (&CSGInterface::cmd_train_classifier),
496  },
497  {
498  N_SVM_TRAIN,
499  (&CSGInterface::cmd_train_classifier),
500  USAGE_I(N_SVM_TRAIN, "[classifier-specific parameters]")
501  },
502  {
503  N_SVMQPSIZE,
504  (&CSGInterface::cmd_set_svm_qpsize),
505  USAGE_I(N_SVMQPSIZE, "size")
506  },
507  {
509  (&CSGInterface::cmd_set_svm_max_qpsize),
510  USAGE_I(N_SVMMAXQPSIZE, "size")
511  },
512  {
513  N_SVMBUFSIZE,
514  (&CSGInterface::cmd_set_svm_bufsize),
515  USAGE_I(N_SVMBUFSIZE, "size")
516  },
517  {
518  N_C,
519  (&CSGInterface::cmd_set_svm_C),
520  USAGE_I(N_C, "C1[" USAGE_COMMA "C2]")
521  },
522  {
524  (&CSGInterface::cmd_set_svm_epsilon),
525  USAGE_I(N_SVM_EPSILON, "epsilon")
526  },
527  {
529  (&CSGInterface::cmd_set_svr_tube_epsilon),
530  USAGE_I(N_SVR_TUBE_EPSILON, "tube_epsilon")
531  },
532  {
533  N_SVM_NU,
534  (&CSGInterface::cmd_set_svm_nu),
535  USAGE_I(N_SVM_NU, "nu")
536  },
537  {
539  (&CSGInterface::cmd_set_svm_mkl_parameters),
540  USAGE_I(N_MKL_PARAMETERS, "weight_epsilon" USAGE_COMMA "C_MKL [" USAGE_COMMA "mkl_norm ]")
541  },
542  {
543  N_ENT_LAMBDA,
544  (&CSGInterface::cmd_set_elasticnet_lambda),
545  USAGE_I(N_ENT_LAMBDA, "ent_lambda")
546  },
547  {
549  (&CSGInterface::cmd_set_mkl_block_norm),
550  USAGE_I(N_MKL_BLOCK_NORM, "mkl_block_norm")
551  },
552  {
554  (&CSGInterface::cmd_set_max_train_time),
555  USAGE_I(N_SVM_MAX_TRAIN_TIME, "max_train_time")
556  },
557  {
559  (&CSGInterface::cmd_set_svm_shrinking_enabled),
560  USAGE_I(N_USE_SHRINKING, "enable_shrinking")
561  },
562  {
564  (&CSGInterface::cmd_set_svm_batch_computation_enabled),
565  USAGE_I(N_USE_BATCH_COMPUTATION, "enable_batch_computation")
566  },
567  {
568  N_USE_LINADD,
569  (&CSGInterface::cmd_set_svm_linadd_enabled),
570  USAGE_I(N_USE_LINADD, "enable_linadd")
571  },
572  {
574  (&CSGInterface::cmd_set_svm_bias_enabled),
575  USAGE_I(N_SVM_USE_BIAS, "enable_bias")
576  },
577  {
579  (&CSGInterface::cmd_set_mkl_interleaved_enabled),
580  USAGE_I(N_MKL_USE_INTERLEAVED_OPTIMIZATION, "enable_interleaved_optimization")
581  },
582  {
583  N_KRR_TAU,
584  (&CSGInterface::cmd_set_krr_tau),
585  USAGE_I(N_KRR_TAU, "tau")
586  },
587 
588 
589  { "Preprocessors", NULL, NULL },
590  {
592  (&CSGInterface::cmd_add_preproc),
593  USAGE_I(N_ADD_PREPROC, "preproc[, preproc-specific parameters]")
594  },
595  {
597  (&CSGInterface::cmd_del_preproc),
599  },
600  {
602  (&CSGInterface::cmd_attach_preproc),
604  },
605  {
607  (&CSGInterface::cmd_clean_preproc),
609  },
610 
611  { "Converters", NULL, NULL },
612  {
614  (&CSGInterface::cmd_set_converter),
616  },
617  {
619  (&CSGInterface::cmd_apply_converter),
620  USAGE_O(N_APPLY_CONVERTER, "conv_features")
621  },
622  {
623  N_EMBED,
624  (&CSGInterface::cmd_embed),
625  USAGE_IO(N_EMBED,"target dim","embedding")
626  },
627 
628 
629  { "HMM", NULL, NULL },
630  {
631  N_NEW_HMM,
632  (&CSGInterface::cmd_new_hmm),
633  USAGE_I(N_NEW_HMM, "N" USAGE_COMMA "M")
634  },
635  {
636  N_LOAD_HMM,
637  (&CSGInterface::cmd_load_hmm),
638  USAGE_I(N_LOAD_HMM, "filename")
639  },
640  {
641  N_SAVE_HMM,
642  (&CSGInterface::cmd_save_hmm),
643  USAGE_I(N_SAVE_HMM, "filename[" USAGE_COMMA "save_binary]")
644  },
645  {
646  N_GET_HMM,
647  (&CSGInterface::cmd_get_hmm),
649  },
650  {
651  N_APPEND_HMM,
652  (&CSGInterface::cmd_append_hmm),
654  },
655  {
657  (&CSGInterface::cmd_append_model),
658  USAGE_I(N_APPEND_MODEL, USAGE_STR "filename" USAGE_STR "[" USAGE_COMMA "base1" USAGE_COMMA "base2]")
659  },
660  {
661  N_SET_HMM,
662  (&CSGInterface::cmd_set_hmm),
664  },
665  {
666  N_SET_HMM_AS,
667  (&CSGInterface::cmd_set_hmm_as),
668  USAGE_I(N_SET_HMM_AS, "POS|NEG|TEST")
669  },
670  {
671  N_CHOP,
672  (&CSGInterface::cmd_set_chop),
673  USAGE_I(N_CHOP, "chop")
674  },
675  {
676  N_PSEUDO,
677  (&CSGInterface::cmd_set_pseudo),
678  USAGE_I(N_PSEUDO, "pseudo")
679  },
680  {
682  (&CSGInterface::cmd_load_definitions),
683  USAGE_I(N_LOAD_DEFINITIONS, "filename" USAGE_COMMA "init")
684  },
685  {
687  (&CSGInterface::cmd_hmm_classify),
688  USAGE_O(N_HMM_CLASSIFY, "result")
689  },
690  {
692  (&CSGInterface::cmd_one_class_linear_hmm_classify),
694  },
695  {
697  (&CSGInterface::cmd_one_class_hmm_classify),
699  },
700  {
702  (&CSGInterface::cmd_one_class_hmm_classify_example),
703  USAGE_IO(N_ONE_CLASS_HMM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
704  },
705  {
707  (&CSGInterface::cmd_hmm_classify_example),
708  USAGE_IO(N_HMM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
709  },
710  {
711  N_OUTPUT_HMM,
712  (&CSGInterface::cmd_output_hmm),
714  },
715  {
717  (&CSGInterface::cmd_output_hmm_defined),
719  },
720  {
722  (&CSGInterface::cmd_hmm_likelihood),
723  USAGE_O(N_HMM_LIKELIHOOD, "likelihood")
724  },
725  {
726  N_LIKELIHOOD,
727  (&CSGInterface::cmd_likelihood),
729  },
730  {
732  (&CSGInterface::cmd_save_likelihood),
733  USAGE_I(N_SAVE_LIKELIHOOD, "filename[" USAGE_COMMA "save_binary]")
734  },
735  {
737  (&CSGInterface::cmd_get_viterbi_path),
738  USAGE_IO(N_GET_VITERBI_PATH, "dim", "path" USAGE_COMMA "likelihood")
739  },
740  {
742  (&CSGInterface::cmd_viterbi_train_defined),
744  },
745  {
747  (&CSGInterface::cmd_viterbi_train),
749  },
750  {
752  (&CSGInterface::cmd_baum_welch_train),
754  },
755  {
757  (&CSGInterface::cmd_baum_welch_train_defined),
759  },
760  {
762  (&CSGInterface::cmd_baum_welch_trans_train),
764  },
765  {
767  (&CSGInterface::cmd_linear_train),
769  },
770  {
771  N_SAVE_PATH,
772  (&CSGInterface::cmd_save_path),
773  USAGE_I(N_SAVE_PATH, "filename[" USAGE_COMMA "save_binary]")
774  },
775  {
777  (&CSGInterface::cmd_convergence_criteria),
778  USAGE_I(N_CONVERGENCE_CRITERIA, "num_iterations" USAGE_COMMA "epsilon")
779  },
780  {
781  N_NORMALIZE,
782  (&CSGInterface::cmd_normalize),
783  USAGE_I(N_NORMALIZE, "[keep_dead_states]")
784  },
785  {
786  N_ADD_STATES,
787  (&CSGInterface::cmd_add_states),
788  USAGE_I(N_ADD_STATES, "states" USAGE_COMMA "value")
789  },
790  {
792  (&CSGInterface::cmd_permutation_entropy),
793  USAGE_I(N_PERMUTATION_ENTROPY, "width" USAGE_COMMA "seqnum")
794  },
795  {
797  (&CSGInterface::cmd_relative_entropy),
798  USAGE_O(N_RELATIVE_ENTROPY, "result")
799  },
800  {
801  N_ENTROPY,
802  (&CSGInterface::cmd_entropy),
803  USAGE_O(N_ENTROPY, "result")
804  },
805  {
806  (char*) N_SET_FEATURE_MATRIX,
807  (&CSGInterface::cmd_set_feature_matrix),
808  (char*) USAGE_I(N_SET_FEATURE_MATRIX, "features")
809  },
810  {
812  (&CSGInterface::cmd_set_feature_matrix_sparse),
813  (char*) USAGE_I(N_SET_FEATURE_MATRIX_SPARSE, "sp1" USAGE_COMMA "sp2" )
814  },
815  {
817  (&CSGInterface::cmd_new_plugin_estimator),
818  USAGE_I(N_NEW_PLUGIN_ESTIMATOR, "pos_pseudo" USAGE_COMMA "neg_pseudo")
819  },
820  {
822  (&CSGInterface::cmd_train_estimator),
824  },
825  {
827  (&CSGInterface::cmd_plugin_estimate_classify_example),
828  USAGE_IO(N_PLUGIN_ESTIMATE_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
829  },
830  {
832  (&CSGInterface::cmd_plugin_estimate_classify),
834  },
835  {
837  (&CSGInterface::cmd_set_plugin_estimate),
838  USAGE_I(N_SET_PLUGIN_ESTIMATE, "emission_probs" USAGE_COMMA "model_sizes")
839  },
840  {
842  (&CSGInterface::cmd_get_plugin_estimate),
843  USAGE_O(N_GET_PLUGIN_ESTIMATE, "emission_probs" USAGE_COMMA "model_sizes")
844  },
845  { "Signals", NULL, NULL },
846  {
848  (&CSGInterface::cmd_signals_set_model),
850  },
851  {
853  (&CSGInterface::cmd_signals_set_positions),
854  USAGE_I(N_SIGNALS_SET_POSITIONS, "positions")
855  },
856  {
858  (&CSGInterface::cmd_signals_set_labels),
859  USAGE_I(N_SIGNALS_SET_LABELS, "labels")
860  },
861  {
863  (&CSGInterface::cmd_signals_set_split),
864  USAGE_I(N_SIGNALS_SET_SPLIT, "split")
865  },
866  {
868  (&CSGInterface::cmd_signals_set_train_mask),
870  },
871  {
873  (&CSGInterface::cmd_signals_add_feature),
874  USAGE_I(N_SIGNALS_ADD_FEATURE, "feature")
875  },
876  {
878  (&CSGInterface::cmd_signals_add_kernel),
879  USAGE_I(N_SIGNALS_ADD_KERNEL, "kernelparam")
880  },
881  {
883  (&CSGInterface::cmd_signals_run),
884  USAGE_I(N_SIGNALS_RUN, "arg1")
885  },
886  { "Structure", NULL, NULL },
887  {
888  N_BEST_PATH,
889  (&CSGInterface::cmd_best_path),
890  USAGE_I(N_BEST_PATH, "from" USAGE_COMMA "to")
891  },
892  {
894  (&CSGInterface::cmd_best_path_2struct),
896  USAGE_COMMA "q"
897  USAGE_COMMA "cmd_trans"
898  USAGE_COMMA "seq"
899  USAGE_COMMA "pos"
900  USAGE_COMMA "genestr"
901  USAGE_COMMA "penalties"
902  USAGE_COMMA "penalty_info"
903  USAGE_COMMA "nbest"
904  USAGE_COMMA "content_weights"
905  USAGE_COMMA "segment_sum_weights",
906  "prob" USAGE_COMMA "path" USAGE_COMMA "pos")
907  },
908  {
909  (char*) N_SET_PLIF_STRUCT,
910  (&CSGInterface::cmd_set_plif_struct),
911  (char*) USAGE_I(N_SET_PLIF_STRUCT, "id"
912  USAGE_COMMA "name"
913  USAGE_COMMA "limits"
914  USAGE_COMMA "penalties"
915  USAGE_COMMA "transform"
916  USAGE_COMMA "min_value"
917  USAGE_COMMA "max_value"
918  USAGE_COMMA "use_cache"
919  USAGE_COMMA "use_svm")
920  },
921  {
922  (char*) N_GET_PLIF_STRUCT,
923  (&CSGInterface::cmd_get_plif_struct),
924  (char*) USAGE_O(N_GET_PLIF_STRUCT, "id"
925  USAGE_COMMA "name"
926  USAGE_COMMA "limits"
927  USAGE_COMMA "penalties"
928  USAGE_COMMA "transform"
929  USAGE_COMMA "min_value"
930  USAGE_COMMA "max_value"
931  USAGE_COMMA "use_cache"
932  USAGE_COMMA "use_svm")
933  },
934  {
935  (char*) N_PRECOMPUTE_SUBKERNELS,
936  (&CSGInterface::cmd_precompute_subkernels),
938  },
939  {
941  (&CSGInterface::cmd_precompute_content_svms),
942  (char*) USAGE_I(N_PRECOMPUTE_CONTENT_SVMS, "sequence"
943  USAGE_COMMA "position_list"
944  USAGE_COMMA "weights")
945  },
946  {
947  (char*) N_GET_LIN_FEAT,
948  (&CSGInterface::cmd_get_lin_feat),
949  (char*) USAGE_O(N_GET_LIN_FEAT, "lin_feat")
950  },
951  {
952  (char*) N_SET_LIN_FEAT,
953  (&CSGInterface::cmd_set_lin_feat),
954  (char*) USAGE_I(N_SET_LIN_FEAT, "lin_feat")
955  },
956  {
957  (char*) N_INIT_DYN_PROG,
958  (&CSGInterface::cmd_init_dyn_prog),
959  (char*) USAGE_I(N_INIT_DYN_PROG, "num_svms")
960  },
961  {
962  (char*) N_CLEAN_UP_DYN_PROG,
963  (&CSGInterface::cmd_clean_up_dyn_prog),
964  (char*) USAGE(N_CLEAN_UP_DYN_PROG)
965  },
966  {
967  (char*) N_INIT_INTRON_LIST,
968  (&CSGInterface::cmd_init_intron_list),
969  (char*) USAGE_I(N_INIT_INTRON_LIST, "start_positions"
970  USAGE_COMMA "end_positions"
971  USAGE_COMMA "quality")
972  },
973  {
975  (&CSGInterface::cmd_precompute_tiling_features),
976  (char*) USAGE_I(N_PRECOMPUTE_TILING_FEATURES, "intensities"
977  USAGE_COMMA "probe_pos"
978  USAGE_COMMA "tiling_plif_ids")
979  },
980  {
982  (&CSGInterface::cmd_long_transition_settings),
983  (char*) USAGE_I(N_LONG_TRANSITION_SETTINGS, "use_long_transitions"
984  USAGE_COMMA "threshold"
985  USAGE_COMMA "max_len")
986  },
987 
988  {
989  (char*) N_SET_MODEL,
990  (&CSGInterface::cmd_set_model),
991  (char*) USAGE_I(N_SET_MODEL, "content_weights"
992  USAGE_COMMA "transition_pointers"
993  USAGE_COMMA "use_orf"
994  USAGE_COMMA "mod_words")
995  },
996 
997  {
998  (char*) N_BEST_PATH_TRANS,
999  (&CSGInterface::cmd_best_path_trans),
1001  USAGE_COMMA "q"
1002  USAGE_COMMA "nbest"
1003  USAGE_COMMA "seq_path"
1004  USAGE_COMMA "a_trans"
1005  USAGE_COMMA "segment_loss",
1006  "prob" USAGE_COMMA "path" USAGE_COMMA "pos")
1007  },
1008  {
1010  (&CSGInterface::cmd_best_path_trans_deriv),
1012  USAGE_COMMA "my_path"
1013  USAGE_COMMA "my_pos"
1014  USAGE_COMMA "p"
1015  USAGE_COMMA "q"
1016  USAGE_COMMA "cmd_trans"
1017  USAGE_COMMA "seq"
1018  USAGE_COMMA "pos"
1019  USAGE_COMMA "genestr"
1020  USAGE_COMMA "penalties"
1021  USAGE_COMMA "state_signals"
1022  USAGE_COMMA "penalty_info"
1023  USAGE_COMMA "dict_weights"
1024  USAGE_COMMA "mod_words ["
1025  USAGE_COMMA "segment_loss"
1026  USAGE_COMMA "segmend_ids_mask]", "p_deriv"
1027  USAGE_COMMA "q_deriv"
1028  USAGE_COMMA "cmd_deriv"
1029  USAGE_COMMA "penalties_deriv"
1030  USAGE_COMMA "my_scores"
1031  USAGE_COMMA "my_loss")
1032  },
1033 
1034  { "POIM", NULL, NULL },
1035  {
1037  (&CSGInterface::cmd_compute_POIM_WD),
1038  USAGE_IO(N_COMPUTE_POIM_WD, "max_order" USAGE_COMMA "distribution", "W")
1039  },
1040  {
1042  (&CSGInterface::cmd_get_SPEC_consensus),
1044  },
1045  {
1047  (&CSGInterface::cmd_get_SPEC_scoring),
1048  USAGE_IO(N_GET_SPEC_SCORING, "max_order", "W")
1049  },
1050  {
1052  (&CSGInterface::cmd_get_WD_consensus),
1054  },
1055  {
1057  (&CSGInterface::cmd_get_WD_scoring),
1058  USAGE_IO(N_GET_WD_SCORING, "max_order", "W")
1059  },
1060 
1061 
1062  { "Utility", NULL, NULL },
1063  {
1064  N_CRC,
1065  (&CSGInterface::cmd_crc),
1066  USAGE_IO(N_CRC, "string", "crc32")
1067  },
1068  {
1069  N_SYSTEM,
1070  (&CSGInterface::cmd_system),
1071  USAGE_I(N_SYSTEM, "system_command")
1072  },
1073  {
1074  N_EXIT,
1075  (&CSGInterface::cmd_exit),
1076  USAGE(N_EXIT)
1077  },
1078  {
1079  N_QUIT,
1080  (&CSGInterface::cmd_exit),
1081  USAGE(N_QUIT)
1082  },
1083  {
1084  N_EXEC,
1085  (&CSGInterface::cmd_exec),
1086  USAGE_I(N_EXEC, "filename")
1087  },
1088  {
1089  N_SET_OUTPUT,
1090  (&CSGInterface::cmd_set_output),
1091  USAGE_I(N_SET_OUTPUT, USAGE_STR "STDERR|STDOUT|filename" USAGE_STR)
1092  },
1093  {
1095  (&CSGInterface::cmd_set_threshold),
1096  USAGE_I(N_SET_THRESHOLD, "threshold")
1097  },
1098  {
1099  N_INIT_RANDOM,
1100  (&CSGInterface::cmd_init_random),
1101  USAGE_I(N_INIT_RANDOM, "value_to_initialize_RNG_with")
1102  },
1103  {
1104  N_THREADS,
1105  (&CSGInterface::cmd_set_num_threads),
1106  USAGE_I(N_THREADS, "num_threads")
1107  },
1108  {
1110  (&CSGInterface::cmd_translate_string),
1112  "string, order, start", "translation")
1113  },
1114  {
1115  N_CLEAR,
1116  (&CSGInterface::cmd_clear),
1117  USAGE(N_CLEAR)
1118  },
1119  {
1120  N_TIC,
1121  (&CSGInterface::cmd_tic),
1122  USAGE(N_TIC)
1123  },
1124  {
1125  N_TOC,
1126  (&CSGInterface::cmd_toc),
1127  USAGE(N_TOC)
1128  },
1129  {
1130  N_PRINT,
1131  (&CSGInterface::cmd_print),
1132  USAGE_I(N_PRINT, "msg")
1133  },
1134  {
1135  N_ECHO,
1136  (&CSGInterface::cmd_echo),
1137  USAGE_I(N_ECHO, "level")
1138  },
1139  {
1140  N_LOGLEVEL,
1141  (&CSGInterface::cmd_loglevel),
1142  USAGE_I(N_LOGLEVEL, USAGE_STR "ALL|DEBUG|INFO|NOTICE|WARN|ERROR|CRITICAL|ALERT|EMERGENCY" USAGE_STR)
1143  },
1144  {
1146  (&CSGInterface::cmd_syntax_highlight),
1148  },
1149  {
1150  N_PROGRESS,
1151  (&CSGInterface::cmd_progress),
1153  },
1154  {
1155  N_GET_VERSION,
1156  (&CSGInterface::cmd_get_version),
1157  USAGE_O(N_GET_VERSION, "version")
1158  },
1159  {
1160  N_HELP,
1161  (&CSGInterface::cmd_help),
1162  USAGE(N_HELP)
1163  },
1164  {
1165  N_WHOS,
1166  (&CSGInterface::cmd_whos),
1167  USAGE(N_WHOS)
1168  },
1169  {
1171  (&CSGInterface::cmd_send_command),
1172  NULL
1173  },
1174  {
1175  N_RUN_PYTHON,
1176  (&CSGInterface::cmd_run_python),
1178  "'Var1', Var1, 'Var2', Var2,..., python_function", "results")
1179  },
1180  {
1181  N_RUN_OCTAVE,
1182  (&CSGInterface::cmd_run_octave),
1184  "'Var1', Var1, 'Var2', Var2,..., octave_function", "results")
1185  },
1186  {
1187  N_RUN_R,
1188  (&CSGInterface::cmd_run_r),
1189  USAGE_IO(N_RUN_R,
1190  "'Var1', Var1, 'Var2', Var2,..., r_function", "results")
1191  },
1192  {NULL, NULL, NULL} /* Sentinel */
1193 };
1194 
1195 
1196 CSGInterface::CSGInterface(bool print_copyright)
1197 : CSGObject(),
1198  ui_classifier(new CGUIClassifier(this)),
1199  ui_distance(new CGUIDistance(this)),
1200  ui_features(new CGUIFeatures(this)),
1201  ui_hmm(new CGUIHMM(this)),
1202  ui_kernel(new CGUIKernel(this)),
1203  ui_labels(new CGUILabels(this)),
1204  ui_math(new CGUIMath(this)),
1205  ui_pluginestimate(new CGUIPluginEstimate(this)),
1206  ui_preproc(new CGUIPreprocessor(this)),
1207  ui_time(new CGUITime(this)),
1208  ui_structure(new CGUIStructure(this)),
1209  ui_converter(new CGUIConverter(this))/*,
1210 / ui_signals(new CGUISignals(this))*/
1211 {
1212  if (print_copyright)
1213  {
1214  version->print_version();
1215  SG_PRINT("( seeding random number generator with %u (seed size %d))\n",
1217 #ifdef USE_LOGCACHE
1218  SG_PRINT("initializing log-table (size=%i*%i*%i=%2.1fMB) ... ) ",
1219  CMath::get_log_range(),CMath::get_log_accuracy(),sizeof(float64_t),
1220  CMath::get_log_range()*CMath::get_log_accuracy()*sizeof(float64_t)/(1024.0*1024.0));
1221 #else
1222  SG_PRINT("determined range for x in log(1+exp(-x)) is:%d )\n", CMath::get_log_range())
1223 #endif
1224  }
1225 
1226  reset();
1227 }
1228 
1229 CSGInterface::~CSGInterface()
1230 {
1231  delete ui_classifier;
1232  delete ui_hmm;
1233  delete ui_pluginestimate;
1234  delete ui_kernel;
1235  delete ui_preproc;
1236  delete ui_features;
1237  delete ui_labels;
1238  delete ui_math;
1239  delete ui_structure;
1240  //delete ui_signals;
1241  delete ui_time;
1242  delete ui_distance;
1243  delete ui_converter;
1244 
1245  if (file_out)
1246  fclose(file_out);
1247 }
1248 
1249 void CSGInterface::reset()
1250 {
1251  m_lhs_counter=0;
1252  m_rhs_counter=0;
1253  m_nlhs=0;
1254  m_nrhs=0;
1255  m_legacy_strptr=NULL;
1256  file_out=NULL;
1257  echo=true;
1258 }
1259 
1260 void CSGInterface::translate_arg(CSGInterface* source, CSGInterface* target)
1261 {
1262  switch (source->get_argument_type())
1263  {
1264  case SCALAR_INT:
1265  target->set_int(source->get_int());
1266  break;
1267  case SCALAR_REAL:
1268  target->set_real(source->get_real());
1269  break;
1270  case SCALAR_BOOL:
1271  target->set_bool(source->get_bool());
1272  break;
1273  case VECTOR_BOOL:
1274  {
1275  bool* v=NULL;
1276  int32_t len=0;
1277  source->get_vector(v, len);
1278  target->set_vector(v, len);
1279  SG_FREE(v);
1280  break;
1281  }
1282  case VECTOR_BYTE:
1283  {
1284  uint8_t* v=NULL;
1285  int32_t len=0;
1286  source->get_vector(v, len);
1287  target->set_vector(v, len);
1288  SG_FREE(v);
1289  break;
1290  }
1291  case VECTOR_CHAR:
1292  {
1293  char* v=NULL;
1294  int32_t len=0;
1295  source->get_vector(v, len);
1296  target->set_vector(v, len);
1297  SG_FREE(v);
1298  break;
1299  }
1300  case VECTOR_INT:
1301  {
1302  int32_t* v=NULL;
1303  int32_t len=0;
1304  source->get_vector(v, len);
1305  target->set_vector(v, len);
1306  SG_FREE(v);
1307  break;
1308  }
1309  case VECTOR_REAL:
1310  {
1311  float64_t* v=NULL;
1312  int32_t len=0;
1313  source->get_vector(v, len);
1314  target->set_vector(v, len);
1315  SG_FREE(v);
1316  break;
1317  }
1318  case VECTOR_SHORTREAL:
1319  {
1320  float32_t* v=NULL;
1321  int32_t len=0;
1322  source->get_vector(v, len);
1323  target->set_vector(v, len);
1324  SG_FREE(v);
1325  break;
1326  }
1327  case VECTOR_SHORT:
1328  {
1329  int16_t* v=NULL;
1330  int32_t len=0;
1331  source->get_vector(v, len);
1332  target->set_vector(v, len);
1333  SG_FREE(v);
1334  break;
1335  }
1336  case VECTOR_WORD:
1337  {
1338  uint16_t* v=NULL;
1339  int32_t len=0;
1340  source->get_vector(v, len);
1341  target->set_vector(v, len);
1342  SG_FREE(v);
1343  break;
1344  }
1345 
1346  case STRING_BYTE:
1347  {
1348  int32_t num_str=0;
1349  int32_t max_str_len=0;
1350  SGString<uint8_t>* strs=NULL;
1351  source->get_string_list(strs, num_str, max_str_len);
1352  target->set_string_list(strs, num_str);
1353  SG_FREE(strs);
1354  break;
1355  }
1356  case STRING_CHAR:
1357  {
1358  int32_t num_str=0;
1359  int32_t max_str_len=0;
1360  SGString<char>* strs;
1361  source->get_string_list(strs, num_str,max_str_len);
1362  target->set_string_list(strs, num_str);
1363  SG_FREE(strs);
1364  break;
1365  }
1366  case STRING_INT:
1367  {
1368  int32_t num_str=0;
1369  int32_t max_str_len=0;
1370  SGString<int32_t>* strs;
1371  source->get_string_list(strs, num_str,max_str_len);
1372  target->set_string_list(strs, num_str);
1373  SG_FREE(strs);
1374  break;
1375  }
1376  case STRING_SHORT:
1377  {
1378  int32_t num_str=0;
1379  int32_t max_str_len=0;
1380  SGString<int16_t>* strs=NULL;
1381  source->get_string_list(strs, num_str, max_str_len);
1382  target->set_string_list(strs, num_str);
1383  SG_FREE(strs);
1384  break;
1385  }
1386  case STRING_WORD:
1387  {
1388  int32_t num_str=0;
1389  int32_t max_str_len=0;
1390  SGString<uint16_t>* strs=NULL;
1391  source->get_string_list(strs, num_str, max_str_len);
1392  target->set_string_list(strs, num_str);
1393  SG_FREE(strs);
1394  break;
1395  }
1396  case DENSE_INT:
1397  {
1398  int32_t num_feat=0;
1399  int32_t num_vec=0;
1400  int32_t* fmatrix=NULL;
1401  source->get_matrix(fmatrix, num_feat, num_vec);
1402  target->set_matrix(fmatrix, num_feat, num_vec);
1403  SG_FREE(fmatrix);
1404  break;
1405  }
1406  case DENSE_REAL:
1407  {
1408  int32_t num_feat=0;
1409  int32_t num_vec=0;
1410  float64_t* fmatrix=NULL;
1411  source->get_matrix(fmatrix, num_feat, num_vec);
1412  target->set_matrix(fmatrix, num_feat, num_vec);
1413  SG_FREE(fmatrix);
1414  break;
1415  }
1416  case DENSE_SHORT:
1417  {
1418  int32_t num_feat=0;
1419  int32_t num_vec=0;
1420  int16_t* fmatrix=NULL;
1421  source->get_matrix(fmatrix, num_feat, num_vec);
1422  target->set_matrix(fmatrix, num_feat, num_vec);
1423  SG_FREE(fmatrix);
1424  break;
1425  }
1426  case DENSE_SHORTREAL:
1427  {
1428  int32_t num_feat=0;
1429  int32_t num_vec=0;
1430  float32_t* fmatrix=NULL;
1431  source->get_matrix(fmatrix, num_feat, num_vec);
1432  target->set_matrix(fmatrix, num_feat, num_vec);
1433  SG_FREE(fmatrix);
1434  break;
1435  }
1436  case DENSE_WORD:
1437  {
1438  int32_t num_feat=0;
1439  int32_t num_vec=0;
1440  uint16_t* fmatrix=NULL;
1441  source->get_matrix(fmatrix, num_feat, num_vec);
1442  target->set_matrix(fmatrix, num_feat, num_vec);
1443  SG_FREE(fmatrix);
1444  break;
1445  }
1446  /*
1447  case NDARRAY_BYTE:
1448  {
1449  uint8_t* a=NULL;
1450  int32_t* dims=NULL;
1451  int32_t num_dims=0;
1452  source->get_ndarray(a, dims, num_dims);
1453  target->set_ndarray(a, dims, num_dims);
1454  SG_FREE(a);
1455  SG_FREE(dims);
1456  break;
1457  }
1458  case NDARRAY_CHAR:
1459  {
1460  char* a=NULL;
1461  int32_t* dims=NULL;
1462  int32_t num_dims=0;
1463  source->get_ndarray(a, dims, num_dims);
1464  target->set_ndarray(a, dims, num_dims);
1465  SG_FREE(a);
1466  SG_FREE(dims);
1467  break;
1468  }
1469  case NDARRAY_INT:
1470  {
1471  int32_t* a=NULL;
1472  int32_t* dims=NULL;
1473  int32_t num_dims=0;
1474  source->get_ndarray(a, dims, num_dims);
1475  target->set_ndarray(a, dims, num_dims);
1476  SG_FREE(a);
1477  SG_FREE(dims);
1478  break;
1479  }
1480  case NDARRAY_REAL:
1481  {
1482  float64_t* a=NULL;
1483  int32_t* dims=NULL;
1484  int32_t num_dims=0;
1485  source->get_ndarray(a, dims, num_dims);
1486  target->set_ndarray(a, dims, num_dims);
1487  SG_FREE(a);
1488  SG_FREE(dims);
1489  break;
1490  }
1491  case NDARRAY_SHORTREAL:
1492  {
1493  float32_t* a=NULL;
1494  int32_t* dims=NULL;
1495  int32_t num_dims=0;
1496  source->get_ndarray(a, dims, num_dims);
1497  target->set_ndarray(a, dims, num_dims);
1498  SG_FREE(a);
1499  SG_FREE(dims);
1500  break;
1501  }
1502  case NDARRAY_SHORT:
1503  {
1504  int16_t* a=NULL;
1505  int32_t* dims=NULL;
1506  int32_t num_dims=0;
1507  source->get_ndarray(a, dims, num_dims);
1508  target->set_ndarray(a, dims, num_dims);
1509  SG_FREE(a);
1510  SG_FREE(dims);
1511  break;
1512  }
1513  case NDARRAY_WORD:
1514  {
1515  uint16_t* a=NULL;
1516  int32_t* dims=NULL;
1517  int32_t num_dims=0;
1518  source->get_ndarray(a, dims, num_dims);
1519  target->set_ndarray(a, dims, num_dims);
1520  SG_FREE(a);
1521  SG_FREE(dims);
1522  break;
1523  }*/
1524  case SPARSE_REAL:
1525  {
1526  int32_t num_feat=0;
1527  int32_t num_vec=0;
1528  SGSparseVector<float64_t>* fmatrix=NULL;
1529  source->get_sparse_matrix(fmatrix, num_feat, num_vec);
1530  int64_t nnz=0;
1531  for (int32_t i=0; i<num_vec; i++)
1532  nnz+=fmatrix[i].num_feat_entries;
1533  target->set_sparse_matrix(fmatrix, num_feat, num_vec, nnz);
1534  SG_FREE(fmatrix);
1535  break;
1536  }
1537 
1538  default:
1539  SG_ERROR("unknown return type")
1540  break;
1541  }
1542 }
1543 
1545 // commands
1547 
1548 /* Features */
1549 
1550 bool CSGInterface::cmd_load_features()
1551 {
1552  if (m_nrhs<8 || !create_return_values(0))
1553  return false;
1554 
1555  int32_t len=0;
1556  char* filename=get_str_from_str_or_direct(len);
1557  char* fclass=get_str_from_str_or_direct(len);
1558  char* type=get_str_from_str_or_direct(len);
1559  char* target=get_str_from_str_or_direct(len);
1560  int32_t size=get_int_from_int_or_str();
1561  int32_t comp_features=get_int_from_int_or_str();
1562 
1563  bool success=ui_features->load(
1564  filename, fclass, type, target, size, comp_features);
1565 
1566  SG_FREE(filename);
1567  SG_FREE(fclass);
1568  SG_FREE(type);
1569  SG_FREE(target);
1570  return success;
1571 }
1572 
1573 bool CSGInterface::cmd_save_features()
1574 {
1575  if (m_nrhs<5 || !create_return_values(0))
1576  return false;
1577 
1578  int32_t len=0;
1579  char* filename=get_str_from_str_or_direct(len);
1580  char* type=get_str_from_str_or_direct(len);
1581  char* target=get_str_from_str_or_direct(len);
1582 
1583  bool success=ui_features->save(filename, type, target);
1584 
1585  SG_FREE(filename);
1586  SG_FREE(type);
1587  SG_FREE(target);
1588  return success;
1589 }
1590 
1591 bool CSGInterface::cmd_clean_features()
1592 {
1593  if (m_nrhs<2 || !create_return_values(0))
1594  return false;
1595 
1596  int32_t len=0;
1597  char* target=get_str_from_str_or_direct(len);
1598 
1599  bool success=ui_features->clean(target);
1600 
1601  SG_FREE(target);
1602  return success;
1603 }
1604 
1605 bool CSGInterface::cmd_get_features()
1606 {
1607  if (m_nrhs!=2 || !create_return_values(1))
1608  return false;
1609 
1610  int32_t tlen=0;
1611  char* target=get_string(tlen);
1612  CFeatures* feat=NULL;
1613 
1614  if (strmatch(target, "TRAIN"))
1615  feat=ui_features->get_train_features();
1616  else if (strmatch(target, "TEST"))
1617  feat=ui_features->get_test_features();
1618  else
1619  {
1620  SG_FREE(target);
1621  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
1622  }
1623  SG_FREE(target);
1624 
1625  ASSERT(feat)
1626 
1627  switch (feat->get_feature_class())
1628  {
1629  case C_DENSE:
1630  {
1631  int32_t num_feat=0;
1632  int32_t num_vec=0;
1633 
1634  switch (feat->get_feature_type())
1635  {
1636  case F_BYTE:
1637  {
1638  uint8_t* fmatrix=((CDenseFeatures<uint8_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1639  set_matrix(fmatrix, num_feat, num_vec);
1640  break;
1641  }
1642 
1643  case F_CHAR:
1644  {
1645  char* fmatrix=((CDenseFeatures<char> *) feat)->get_feature_matrix(num_feat, num_vec);
1646  set_matrix(fmatrix, num_feat, num_vec);
1647  break;
1648  }
1649 
1650  case F_DREAL:
1651  {
1652  float64_t* fmatrix=((CDenseFeatures<float64_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1653  set_matrix(fmatrix, num_feat, num_vec);
1654  break;
1655  }
1656 
1657  case F_INT:
1658  {
1659  int32_t* fmatrix=((CDenseFeatures<int32_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1660  set_matrix(fmatrix, num_feat, num_vec);
1661  break;
1662  }
1663 
1664  case F_SHORT:
1665  {
1666  int16_t* fmatrix=((CDenseFeatures<int16_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1667  set_matrix(fmatrix, num_feat, num_vec);
1668  break;
1669  }
1670 
1671  case F_SHORTREAL:
1672  {
1673  float32_t* fmatrix=((CDenseFeatures<float32_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1674  set_matrix(fmatrix, num_feat, num_vec);
1675  break;
1676  }
1677 
1678  case F_WORD:
1679  {
1680  uint16_t* fmatrix=((CDenseFeatures<uint16_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1681  set_matrix(fmatrix, num_feat, num_vec);
1682  break;
1683  }
1684 
1685  default:
1687  }
1688  break;
1689  }
1690 
1691  case C_SPARSE:
1692  {
1693  switch (feat->get_feature_type())
1694  {
1695  case F_DREAL:
1696  {
1697  int64_t nnz=((CSparseFeatures<float64_t>*) feat)->
1698  get_num_nonzero_entries();
1699  SGSparseMatrix<float64_t> fmatrix = ((CSparseFeatures<float64_t>*) feat)->get_sparse_feature_matrix();
1700  SG_INFO("sparse matrix has %d feats, %d vecs and %d nnz elemements\n", fmatrix.num_features, fmatrix.num_vectors, nnz)
1701 
1702  set_sparse_matrix(fmatrix.sparse_matrix, fmatrix.num_features, fmatrix.num_vectors, nnz);
1703  break;
1704  }
1705 
1706  default:
1708  }
1709  break;
1710  }
1711 
1712  case C_STRING:
1713  {
1714  int32_t num_str=0;
1715  int32_t max_str_len=0;
1716  switch (feat->get_feature_type())
1717  {
1718  case F_BYTE:
1719  {
1720  SGString<uint8_t>* fmatrix=((CStringFeatures<uint8_t>*) feat)->get_features(num_str, max_str_len);
1721  set_string_list(fmatrix, num_str);
1722  break;
1723  }
1724 
1725  case F_CHAR:
1726  {
1727  SGString<char>* fmatrix=((CStringFeatures<char>*) feat)->get_features(num_str, max_str_len);
1728  set_string_list(fmatrix, num_str);
1729  break;
1730  }
1731 
1732  case F_WORD:
1733  {
1734  SGString<uint16_t>* fmatrix=((CStringFeatures<uint16_t>*) feat)->get_features(num_str, max_str_len);
1735  set_string_list(fmatrix, num_str);
1736  break;
1737  }
1738 
1739  default:
1741  }
1742  break;
1743  }
1744 
1745  case C_WD:
1746  case C_WEIGHTEDSPEC:
1747  case C_SPEC:
1748  case C_COMBINED_DOT:
1749  case C_POLY:
1750  {
1751 
1752  SGMatrix<float64_t> fmatrix = ((CDotFeatures*) feat)->get_computed_dot_feature_matrix();
1753  set_matrix(fmatrix.matrix, fmatrix.num_cols, fmatrix.num_rows);
1754  break;
1755  }
1756 
1757  default:
1759  }
1760 
1761  return true;
1762 }
1763 
1764 bool CSGInterface::cmd_add_features()
1765 {
1766  if (m_nrhs<3 || !create_return_values(0))
1767  return false;
1768 
1769  return do_set_features(true, false);
1770 }
1771 
1772 bool CSGInterface::cmd_add_multiple_features()
1773 {
1774  if ((m_nrhs!=4 && m_nrhs<5) || !create_return_values(0))
1775  return false;
1776 
1777  int32_t repetitions=get_int();
1778 
1779  ASSERT(repetitions>=1)
1780 
1781  return do_set_features(true, false, repetitions);
1782 }
1783 
1784 bool CSGInterface::cmd_add_dotfeatures()
1785 {
1786  if (m_nrhs<3 || !create_return_values(0))
1787  return false;
1788 
1789  return do_set_features(true, true);
1790 }
1791 
1792 bool CSGInterface::cmd_set_features()
1793 {
1794  if (m_nrhs<3 || !create_return_values(0))
1795  return false;
1796 
1797  return do_set_features(false, false);
1798 }
1799 
1800 bool CSGInterface::do_set_features(bool add, bool check_dot, int32_t repetitions)
1801 {
1802  int32_t tlen=0;
1803  char* target=get_string(tlen);
1804  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
1805  {
1806  SG_FREE(target);
1807  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
1808  }
1809 
1810  CFeatures* feat=NULL;
1811  int32_t num_feat=0;
1812  int32_t num_vec=0;
1813 
1814  switch (get_argument_type())
1815  {
1816  case SPARSE_REAL:
1817  {
1818  SGSparseVector<float64_t>* fmatrix=NULL;
1819  get_sparse_matrix(fmatrix, num_feat, num_vec);
1820 
1821  feat=new CSparseFeatures<float64_t>(SGSparseMatrix<float64_t>(fmatrix, num_feat, num_vec));
1822  break;
1823  }
1824 
1825  case DENSE_REAL:
1826  {
1827  float64_t* fmatrix=NULL;
1828  get_matrix(fmatrix, num_feat, num_vec);
1829 
1830  feat=new CDenseFeatures<float64_t>(0);
1831  ((CDenseFeatures<float64_t>*) feat)->
1832  set_feature_matrix(SGMatrix<float64_t>(fmatrix, num_feat, num_vec));
1833 
1834  if (m_nrhs==6)
1835  feat = create_custom_real_features((CDenseFeatures<float64_t>*) feat);
1836 
1837  break;
1838  }
1839 
1840  case DENSE_INT:
1841  {
1842  int32_t* fmatrix=NULL;
1843  get_matrix(fmatrix, num_feat, num_vec);
1844 
1845  feat=new CDenseFeatures<int32_t>(0);
1846  ((CDenseFeatures<int32_t>*) feat)->
1847  set_feature_matrix(SGMatrix<int32_t>(fmatrix, num_feat, num_vec));
1848  break;
1849  }
1850 
1851  case DENSE_SHORT:
1852  {
1853  int16_t* fmatrix=NULL;
1854  get_matrix(fmatrix, num_feat, num_vec);
1855 
1856  feat=new CDenseFeatures<int16_t>(0);
1857  ((CDenseFeatures<int16_t>*) feat)->
1858  set_feature_matrix(SGMatrix<int16_t>(fmatrix, num_feat, num_vec));
1859  break;
1860  }
1861 
1862  case DENSE_WORD:
1863  {
1864  uint16_t* fmatrix=NULL;
1865  get_matrix(fmatrix, num_feat, num_vec);
1866 
1867  feat=new CDenseFeatures<uint16_t>(0);
1868  ((CDenseFeatures<uint16_t>*) feat)->
1869  set_feature_matrix(SGMatrix<uint16_t>(fmatrix, num_feat, num_vec));
1870  break;
1871  }
1872 
1873  case DENSE_SHORTREAL:
1874  {
1875  float32_t* fmatrix=NULL;
1876  get_matrix(fmatrix, num_feat, num_vec);
1877 
1878  feat=new CDenseFeatures<float32_t>(0);
1879  ((CDenseFeatures<float32_t>*) feat)->
1880  set_feature_matrix(SGMatrix<float32_t>(fmatrix, num_feat, num_vec));
1881  break;
1882  }
1883 
1884  case STRING_CHAR:
1885  {
1886  if (m_nrhs<4)
1887  SG_ERROR("Please specify alphabet!\n")
1888 
1889  int32_t num_str=0;
1890  int32_t max_str_len=0;
1891  SGString<char>* fmatrix=NULL;
1892  get_string_list(fmatrix, num_str, max_str_len);
1893 
1894  int32_t alphabet_len=0;
1895  char* alphabet_str=get_string(alphabet_len);
1896  ASSERT(alphabet_str)
1897 
1898  if (strmatch(alphabet_str, "DNABINFILE"))
1899  {
1900  SG_FREE(alphabet_str);
1901 
1902  ASSERT(fmatrix[0].string)
1903  feat=new CStringFeatures<uint8_t>(DNA);
1904 
1905  try
1906  {
1907  ((CStringFeatures<uint8_t>*) feat)->load_ascii_file(fmatrix[0].string);
1908  }
1909  catch (...)
1910  {
1911  SG_UNREF(feat);
1912  SG_ERROR("Couldn't load DNA features from file.\n")
1913  }
1914  feat=create_custom_string_features((CStringFeatures<uint8_t>*) feat);
1915  break;
1916  }
1917  else
1918  {
1919  bool convert_to_word=false;
1920  bool convert_to_ulong=false;
1921  CAlphabet* alphabet=NULL;
1922  if (strmatch(alphabet_str, "DNAWORD"))
1923  {
1924  alphabet=new CAlphabet(DNA);
1925  convert_to_word=true;
1926  }
1927  else if (strmatch(alphabet_str, "DNAULONG"))
1928  {
1929  alphabet=new CAlphabet(DNA);
1930  convert_to_ulong=true;
1931  }
1932  else
1933  alphabet=new CAlphabet(alphabet_str, alphabet_len);
1934 
1935  SG_REF(alphabet);
1936  SG_FREE(alphabet_str);
1937 
1938  feat=new CStringFeatures<char>(alphabet);
1939 
1940  if (!((CStringFeatures<char>*) feat)->set_features(fmatrix, num_str, max_str_len))
1941  {
1942  SG_UNREF(alphabet);
1943  SG_UNREF(feat);
1944  SG_ERROR("Couldnt set byte string features.\n")
1945  }
1946 
1947  SG_UNREF(alphabet);
1948 
1949  if (convert_to_word || convert_to_ulong)
1950  convert_to_bitembedding(feat, convert_to_word, convert_to_ulong);
1951  }
1952 
1953  obtain_from_single_string(feat);
1954  break;
1955  }
1956 
1957  case STRING_BYTE:
1958  {
1959  if (m_nrhs<4)
1960  SG_ERROR("Please specify alphabet!\n")
1961 
1962  int32_t num_str=0;
1963  int32_t max_str_len=0;
1964  SGString<uint8_t>* fmatrix=NULL;
1965  get_string_list(fmatrix, num_str, max_str_len);
1966 
1967  int32_t alphabet_len=0;
1968  char* alphabet_str=get_string(alphabet_len);
1969  ASSERT(alphabet_str)
1970  CAlphabet* alphabet=NULL;
1971  alphabet=new CAlphabet(alphabet_str, alphabet_len);
1972  SG_FREE(alphabet_str);
1973 
1974  feat=new CStringFeatures<uint8_t>(alphabet);
1975  if (!((CStringFeatures<uint8_t>*) feat)->set_features(fmatrix, num_str, max_str_len))
1976  {
1977  SG_UNREF(alphabet);
1978  SG_UNREF(feat);
1979  SG_ERROR("Couldnt set byte string features.\n")
1980  }
1981  feat=create_custom_string_features((CStringFeatures<uint8_t>*) feat);
1982  break;
1983  }
1984 
1985  default:
1986  SG_ERROR("Wrong argument type %d.\n", get_argument_type())
1987  }
1988 
1989  if (check_dot && !feat->has_property(FP_DOT))
1990  {
1991  SG_UNREF(feat);
1992  SG_ERROR("Feature type not supported by DOT Features\n")
1993  }
1994 
1995  if (strmatch(target, "TRAIN"))
1996  {
1997  if (!add)
1998  ui_features->set_train_features(feat);
1999  else if (check_dot)
2000  {
2001  for (int32_t i=0; i<repetitions; i++)
2002  ui_features->add_train_dotfeatures((CDotFeatures*) feat);
2003  }
2004  else
2005  {
2006  for (int32_t i=0; i<repetitions; i++)
2007  ui_features->add_train_features(feat);
2008  }
2009  }
2010  else
2011  {
2012  if (!add)
2013  ui_features->set_test_features(feat);
2014  else if (check_dot)
2015  {
2016  for (int32_t i=0; i<repetitions; i++)
2017  ui_features->add_test_dotfeatures((CDotFeatures*) feat);
2018  }
2019  else
2020  {
2021  for (int32_t i=0; i<repetitions; i++)
2022  ui_features->add_test_features(feat);
2023  }
2024  }
2025 
2026  SG_FREE(target);
2027 
2028  return true;
2029 }
2030 
2031 bool CSGInterface::cmd_set_reference_features()
2032 {
2033  if (m_nrhs<3 || !create_return_values(0))
2034  return false;
2035 
2036  int32_t len=0;
2037  char* target=get_str_from_str_or_direct(len);
2038 
2039  bool success=ui_features->set_reference_features(target);
2040 
2041  SG_FREE(target);
2042  return success;
2043 }
2044 
2045 bool CSGInterface::cmd_del_last_features()
2046 {
2047  if (m_nrhs<2 || !create_return_values(0))
2048  return false;
2049 
2050  int32_t len=0;
2051  char* target=get_str_from_str_or_direct(len);
2052  bool success=ui_features->del_last_feature_obj(target);
2053 
2054  SG_FREE(target);
2055  return success;
2056 }
2057 
2058 bool CSGInterface::cmd_convert()
2059 {
2060  if (m_nrhs<5 || !create_return_values(0))
2061  return false;
2062 
2063  int32_t len=0;
2064  char* target=get_str_from_str_or_direct(len);
2065  CFeatures* features=ui_features->get_convert_features(target);
2066  if (!features)
2067  {
2068  SG_FREE(target);
2069  SG_ERROR("No \"%s\" features available.\n", target)
2070  }
2071 
2072  char* from_class=get_str_from_str_or_direct(len);
2073  char* from_type=get_str_from_str_or_direct(len);
2074  char* to_class=get_str_from_str_or_direct(len);
2075  char* to_type=get_str_from_str_or_direct(len);
2076 
2077  CFeatures* result=NULL;
2078  if (strmatch(from_class, "SIMPLE"))
2079  {
2080  if (strmatch(from_type, "REAL"))
2081  {
2082  if (strmatch(to_class, "SPARSE") &&
2083  strmatch(to_type, "REAL"))
2084  {
2085  result=ui_features->convert_simple_real_to_sparse_real(
2086  ((CDenseFeatures<float64_t>*) features));
2087  }
2088  else
2090  } // from_type REAL
2091 
2092  else if (strmatch(from_type, "CHAR"))
2093  {
2094  if (strmatch(to_class, "STRING") &&
2095  strmatch(to_type, "CHAR"))
2096  {
2097  result=ui_features->convert_simple_char_to_string_char(
2098  ((CDenseFeatures<char>*) features));
2099  }
2100  else if (strmatch(to_class, "SIMPLE"))
2101  {
2102  if (strmatch(to_type, "ALIGN") && m_nrhs==8)
2103  {
2104  float64_t gap_cost=get_real_from_real_or_str();
2105  result=ui_features->convert_simple_char_to_simple_align(
2106  (CDenseFeatures<char>*) features, gap_cost);
2107  }
2108  else
2110  }
2111  else
2113  } // from_type CHAR
2114 
2115  else if (strmatch(from_type, "WORD"))
2116  {
2117  if (strmatch(to_class, "SIMPLE") &&
2118  strmatch(to_type, "SALZBERG"))
2119  {
2120  result=ui_features->convert_simple_word_to_simple_salzberg(
2121  (CDenseFeatures<uint16_t>*) features);
2122  }
2123  else
2125  } // from_type WORD
2126 
2127  else
2129  } // from_class SIMPLE
2130 
2131  else if (strmatch(from_class, "SPARSE"))
2132  {
2133  if (strmatch(from_type, "REAL"))
2134  {
2135  if (strmatch(to_class, "SIMPLE") &&
2136  strmatch(to_type, "REAL"))
2137  {
2138  result=ui_features->convert_sparse_real_to_simple_real(
2139  (CSparseFeatures<float64_t>*) features);
2140  }
2141  else
2143  } // from_type REAL
2144  else
2146  } // from_class SPARSE
2147 
2148  else if (strmatch(from_class, "STRING"))
2149  {
2150  if (strmatch(from_type, "CHAR"))
2151  {
2152  if (strmatch(to_class, "STRING"))
2153  {
2154  int32_t order=1;
2155  int32_t start=0;
2156  int32_t gap=0;
2157  char rev='f';
2158 
2159  if (m_nrhs>6)
2160  {
2161  order=get_int_from_int_or_str();
2162 
2163  if (m_nrhs>7)
2164  {
2165  start=get_int_from_int_or_str();
2166 
2167  if (m_nrhs>8)
2168  {
2169  gap=get_int_from_int_or_str();
2170 
2171  if (m_nrhs>9)
2172  {
2173  char* rev_str=get_str_from_str_or_direct(len);
2174  if (rev_str)
2175  rev=rev_str[0];
2176 
2177  SG_FREE(rev_str);
2178  }
2179  }
2180  }
2181  }
2182 
2183  if (strmatch(to_type, "BYTE"))
2184  {
2185  result=ui_features->convert_string_char_to_string_generic<char,uint8_t>(
2186  (CStringFeatures<char>*) features, order, start,
2187  gap, rev);
2188  }
2189  else if (strmatch(to_type, "WORD"))
2190  {
2191  result=ui_features->convert_string_char_to_string_generic<char,uint16_t>(
2192  (CStringFeatures<char>*) features, order, start,
2193  gap, rev);
2194  }
2195  else if (strmatch(to_type, "ULONG"))
2196  {
2197  result=ui_features->convert_string_char_to_string_generic<char,uint64_t>(
2198  (CStringFeatures<char>*) features, order, start,
2199  gap, rev);
2200  }
2201  else
2203  }
2204  else
2206  } // from_type CHAR
2207 
2208  else if (strmatch(from_type, "BYTE"))
2209  {
2210  if (strmatch(to_class, "STRING"))
2211  {
2212  int32_t order=1;
2213  int32_t start=0;
2214  int32_t gap=0;
2215  char rev='f';
2216 
2217  if (m_nrhs>6)
2218  {
2219  order=get_int_from_int_or_str();
2220 
2221  if (m_nrhs>7)
2222  {
2223  start=get_int_from_int_or_str();
2224 
2225  if (m_nrhs>8)
2226  {
2227  gap=get_int_from_int_or_str();
2228 
2229  if (m_nrhs>9)
2230  {
2231  char* rev_str=get_str_from_str_or_direct(len);
2232  if (rev_str)
2233  rev=rev_str[0];
2234 
2235  SG_FREE(rev_str);
2236  }
2237  }
2238  }
2239  }
2240 
2241  if (strmatch(to_type, "WORD"))
2242  {
2243  result=ui_features->convert_string_char_to_string_generic<uint8_t,uint16_t>(
2244  (CStringFeatures<uint8_t>*) features, order, start,
2245  gap, rev);
2246  }
2247  else if (strmatch(to_type, "ULONG"))
2248  {
2249  result=ui_features->convert_string_char_to_string_generic<uint8_t,uint64_t>(
2250  (CStringFeatures<uint8_t>*) features, order, start,
2251  gap, rev);
2252  }
2253  else
2255  }
2256  else
2258  } // from_type uint8_t
2259 
2260  else if (strmatch(from_type, "WORD"))
2261  {
2262  if (strmatch(to_class, "SIMPLE") && strmatch(to_type, "TOP"))
2263  {
2264  result=ui_features->convert_string_word_to_simple_top(
2265  (CStringFeatures<uint16_t>*) features);
2266  }
2267  else if (strmatch(to_class, "SPEC") && strmatch(to_type, "WORD") && m_nrhs==7)
2268  {
2269  bool use_norm=get_bool();
2270  result=ui_features->convert_string_byte_to_spec_word((CStringFeatures<uint16_t>*) features, use_norm);
2271 
2272  }
2273  else
2275  } // from_type WORD
2276 
2277  else if (strmatch(to_class, "SIMPLE") && strmatch(to_type, "FK"))
2278  {
2279  result=ui_features->convert_string_word_to_simple_fk(
2280  (CStringFeatures<uint16_t>*) features);
2281  } // to_type FK
2282 
2283  else
2285 
2286  } // from_class STRING
2287 
2288  if (result && ui_features->set_convert_features(result, target))
2289  SG_INFO("Conversion was successful.\n")
2290  else
2291  SG_ERROR("Conversion failed.\n")
2292 
2293  SG_FREE(target);
2294  SG_FREE(from_class);
2295  SG_FREE(from_type);
2296  SG_FREE(to_class);
2297  SG_FREE(to_type);
2298  return (result!=NULL);
2299 }
2300 
2301 void CSGInterface::convert_to_bitembedding(CFeatures* &features, bool convert_to_word, bool convert_to_ulong)
2302 {
2303  int32_t order=1;
2304  int32_t start=0;
2305  int32_t gap=0;
2306  char rev='f';
2307 
2308  if (m_nrhs<5)
2309  return;
2310 
2311  order=get_int();
2312  // remove arg, for parameters to come
2313  m_nrhs--;
2314 
2315  if (convert_to_word)
2316  {
2317  SG_INFO("Converting into word-bitembedding\n")
2318  features=ui_features->convert_string_char_to_string_generic<char,uint16_t>(
2319  (CStringFeatures<char>*) features, order, start, gap, rev);
2320  }
2321 
2322  if (convert_to_ulong)
2323  {
2324  SG_INFO("Converting into ulong-bitembedding\n")
2325  features=ui_features->convert_string_char_to_string_generic<char,uint64_t>(
2326  (CStringFeatures<char>*) features, order, start, gap, rev);
2327  }
2328 }
2329 
2330 void CSGInterface::obtain_from_single_string(CFeatures* features)
2331 {
2332  if (m_nrhs<5)
2333  return;
2334 
2335  int32_t len=0;
2336  char* str=get_string(len);
2337  ASSERT(str)
2338 
2339  if (strmatch(str, "from_position_list"))
2340  {
2341  obtain_from_position_list(features);
2342  }
2343  else if (strmatch(str, "slide_window"))
2344  {
2345  obtain_by_sliding_window(features);
2346  }
2347  else
2348  SG_SERROR("Unknown conversion\n")
2349 }
2350 
2351 bool CSGInterface::obtain_from_position_list(CFeatures* features)
2352 {
2353  int32_t winsize=get_int();
2354 
2355  int32_t* shifts=NULL;
2356  int32_t num_shift=0;
2357  get_vector(shifts, num_shift);
2358 
2359  int32_t skip=0;
2360  if (m_nrhs==8)
2361  skip=get_int();
2362 
2363  SG_DEBUG("winsize: %d num_shifts: %d skip: %d\n", winsize, num_shift, skip)
2364 
2365  CDynamicArray<int32_t> positions(num_shift+1);
2366 
2367  for (int32_t i=0; i<num_shift; i++)
2368  positions.set_element(shifts[i], i);
2369 
2370  if (features->get_feature_class()!=C_STRING)
2371  SG_ERROR("No string features.\n")
2372 
2373  bool success=false;
2374  switch (features->get_feature_type())
2375  {
2376  case F_CHAR:
2377  {
2378  success=(((CStringFeatures<char>*) features)->
2379  obtain_by_position_list(winsize, &positions, skip)>0);
2380  break;
2381  }
2382  case F_BYTE:
2383  {
2384  success=(((CStringFeatures<uint8_t>*) features)->
2385  obtain_by_position_list(winsize, &positions, skip)>0);
2386  break;
2387  }
2388  case F_WORD:
2389  {
2390  success=(((CStringFeatures<uint16_t>*) features)->
2391  obtain_by_position_list(winsize, &positions, skip)>0);
2392  break;
2393  }
2394  case F_ULONG:
2395  {
2396  success=(((CStringFeatures<uint64_t>*) features)->
2397  obtain_by_position_list(winsize, &positions, skip)>0);
2398  break;
2399  }
2400  default:
2401  SG_ERROR("Unsupported string features type.\n")
2402  }
2403 
2404  return success;
2405 }
2406 
2407 bool CSGInterface::obtain_by_sliding_window(CFeatures* features)
2408 {
2409  int32_t winsize=get_int();
2410  int32_t shift=get_int();
2411  int32_t skip=0;
2412 
2413  if (m_nrhs==8)
2414  skip=get_int();
2415 
2416  bool success=false;
2417 
2418  ASSERT(features)
2419  ASSERT(((CFeatures*) features)->get_feature_class()==C_STRING)
2420 
2421  switch (features->get_feature_type())
2422  {
2423  case F_CHAR:
2424  return ( ((CStringFeatures<char>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2425  case F_BYTE:
2426  return ( ((CStringFeatures<uint8_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2427  case F_WORD:
2428  return ( ((CStringFeatures<uint16_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2429  case F_ULONG:
2430  return ( ((CStringFeatures<uint64_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2431  default:
2432  SG_SERROR("Unsupported string features type.\n")
2433  return false;
2434  }
2435 
2436  return success;
2437 }
2438 
2439 bool CSGInterface::cmd_reshape()
2440 {
2441  if (m_nrhs<4 || !create_return_values(0))
2442  return false;
2443 
2444  int32_t len=0;
2445  char* target=get_str_from_str_or_direct(len);
2446  int32_t num_feat=get_int_from_int_or_str();
2447  int32_t num_vec=get_int_from_int_or_str();
2448 
2449  bool success=ui_features->reshape(target, num_feat, num_vec);
2450 
2451  SG_FREE(target);
2452  return success;
2453 }
2454 
2455 bool CSGInterface::cmd_load_labels()
2456 {
2457  if (m_nrhs<4 || !create_return_values(0))
2458  return false;
2459 
2460  int32_t len=0;
2461  char* filename=get_str_from_str_or_direct(len);
2462  char* target=get_str_from_str_or_direct(len);
2463 
2464  bool success=ui_labels->load(filename, target);
2465 
2466  SG_FREE(filename);
2467  SG_FREE(target);
2468  return success;
2469 }
2470 
2471 bool CSGInterface::cmd_set_labels()
2472 {
2473  if (m_nrhs!=3 || !create_return_values(0))
2474  return false;
2475 
2476  int32_t tlen=0;
2477  char* target=get_string(tlen);
2478  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
2479  {
2480  SG_FREE(target);
2481  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
2482  }
2483 
2484  float64_t* lab=NULL;
2485  int32_t len=0;
2486  get_vector(lab, len);
2487 
2488  CLabels* labels=ui_labels->infer_labels(lab, len);
2489 
2490  SG_INFO("num labels: %d\n", labels->get_num_labels())
2491 
2492  if (strmatch(target, "TRAIN"))
2493  ui_labels->set_train_labels(labels);
2494  else if (strmatch(target, "TEST"))
2495  ui_labels->set_test_labels(labels);
2496  else
2497  {
2498  SG_FREE(target);
2499  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
2500  }
2501  SG_FREE(target);
2502 
2503  return true;
2504 }
2505 
2506 bool CSGInterface::cmd_get_labels()
2507 {
2508  if (m_nrhs!=2 || !create_return_values(1))
2509  return false;
2510 
2511  int32_t tlen=0;
2512  char* target=get_string(tlen);
2513  CLabels* labels=NULL;
2514 
2515  if (strmatch(target, "TRAIN"))
2516  labels=ui_labels->get_train_labels();
2517  else if (strmatch(target, "TEST"))
2518  labels=ui_labels->get_test_labels();
2519  else
2520  {
2521  SG_FREE(target);
2522  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
2523  }
2524  SG_FREE(target);
2525 
2526  if (!labels)
2527  SG_ERROR("No labels.\n")
2528 
2529  //FIXME
2530  SGVector<float64_t> lab=((CBinaryLabels*) labels)->get_labels();
2531 
2532  set_vector(lab.vector, lab.vlen);
2533  return true;
2534 }
2535 
2536 
2539 bool CSGInterface::cmd_set_kernel_normalization()
2540 {
2541  if (m_nrhs<2 || !create_return_values(0))
2542  return false;
2543 
2544  int32_t len=0;
2545  char* normalization=get_string(len);
2546 
2547  float64_t c=0;
2548  float64_t r=0;
2549 
2550  if (m_nrhs>=3)
2551  c=get_real();
2552  if (m_nrhs>=4)
2553  r=get_real();
2554 
2555  bool success=ui_kernel->set_normalization(normalization, c, r);
2556 
2557  SG_FREE(normalization);
2558  return success;
2559 }
2560 
2561 bool CSGInterface::cmd_set_kernel()
2562 {
2563  if (m_nrhs<2 || !create_return_values(0))
2564  return false;
2565 
2566  SG_DEBUG("SGInterface: set_kernel\n")
2567  CKernel* kernel=create_kernel();
2568  return ui_kernel->set_kernel(kernel);
2569 }
2570 
2571 bool CSGInterface::cmd_add_kernel()
2572 {
2573  if (m_nrhs<3 || !create_return_values(0))
2574  return false;
2575 
2576  float64_t weight=get_real_from_real_or_str();
2577  // adjust m_nrhs to play well with checks in create_kernel
2578  m_nrhs--;
2579  CKernel* kernel=create_kernel();
2580 
2581  SG_DEBUG("SGInterface: add_kernel\n")
2582  return ui_kernel->add_kernel(kernel, weight);
2583 }
2584 
2585 bool CSGInterface::cmd_del_last_kernel()
2586 {
2587  if (m_nrhs<1 || !create_return_values(0))
2588  return false;
2589 
2590  return ui_kernel->del_last_kernel();
2591 }
2592 
2593 CKernel* CSGInterface::create_kernel()
2594 {
2595  CKernel* kernel=NULL;
2596  int32_t len=0;
2597  char* type=get_str_from_str_or_direct(len);
2598 
2599  SG_DEBUG("set_kernel with type: %s\n", type)
2600 
2601  if (strmatch(type, "COMBINED"))
2602  {
2603  if (m_nrhs<3)
2604  return NULL;
2605 
2606  int32_t size=get_int_from_int_or_str();
2607  bool append_subkernel_weights=false;
2608  if (m_nrhs>3)
2609  append_subkernel_weights=get_bool_from_bool_or_str();
2610 
2611  kernel=ui_kernel->create_combined(size, append_subkernel_weights);
2612  }
2613  else if (strmatch(type, "DISTANCE"))
2614  {
2615  if (m_nrhs<3)
2616  return NULL;
2617 
2618  int32_t size=get_int_from_int_or_str();
2619  float64_t width=1;
2620  if (m_nrhs>3)
2621  width=get_real_from_real_or_str();
2622 
2623  kernel=ui_kernel->create_distance(size, width);
2624  }
2625  else if (strmatch(type, "WAVELET"))
2626  {
2627 
2628  if (m_nrhs<4)
2629  return NULL;
2630 
2631  char* dtype=get_str_from_str_or_direct(len);
2632  if (strmatch(dtype, "REAL"))
2633  {
2634  int32_t size=get_int_from_int_or_str();
2635  float64_t Wdilation=5.0;
2636  float64_t Wtranslation=2.0;
2637 
2638  if (m_nrhs>4)
2639  {
2640  Wdilation=get_real_from_real_or_str();
2641 
2642  if (m_nrhs>5)
2643  Wtranslation=get_real_from_real_or_str();
2644  }
2645 
2646  kernel=ui_kernel->create_sigmoid(size, Wdilation, Wtranslation);
2647  }
2648 
2649  SG_FREE(dtype);
2650  }
2651  else if (strmatch(type, "LINEAR"))
2652  {
2653  if (m_nrhs<4)
2654  return NULL;
2655  if (m_nrhs>5)
2656  return NULL;
2657 
2658  char* dtype=get_str_from_str_or_direct(len);
2659  int32_t size=get_int_from_int_or_str();
2660  float64_t scale=-1;
2661  if (m_nrhs==5)
2662  scale=get_real_from_real_or_str();
2663 
2664  if (strmatch(dtype, "BYTE"))
2665  kernel=ui_kernel->create_linearbyte(size, scale);
2666  else if (strmatch(dtype, "WORD"))
2667  kernel=ui_kernel->create_linearword(size, scale);
2668  else if (strmatch(dtype, "CHAR"))
2669  kernel=ui_kernel->create_linearstring(size, scale);
2670  else if (strmatch(dtype, "REAL"))
2671  kernel=ui_kernel->create_linear(size, scale);
2672  else if (strmatch(dtype, "SPARSEREAL"))
2673  kernel=ui_kernel->create_sparselinear(size, scale);
2674 
2675  SG_FREE(dtype);
2676  }
2677  else if (strmatch(type, "HISTOGRAM"))
2678  {
2679  if (m_nrhs<4)
2680  return NULL;
2681 
2682  char* dtype=get_str_from_str_or_direct(len);
2683  if (strmatch(dtype, "WORD"))
2684  {
2685  int32_t size=get_int_from_int_or_str();
2686  kernel=ui_kernel->create_histogramword(size);
2687  }
2688 
2689  SG_FREE(dtype);
2690  }
2691  else if (strmatch(type, "SALZBERG"))
2692  {
2693  if (m_nrhs<4)
2694  return NULL;
2695 
2696  char* dtype=get_str_from_str_or_direct(len);
2697  if (strmatch(dtype, "WORD"))
2698  {
2699  int32_t size=get_int_from_int_or_str();
2700  kernel=ui_kernel->create_salzbergword(size);
2701  }
2702 
2703  SG_FREE(dtype);
2704  }
2705  else if (strmatch(type, "POLYMATCH"))
2706  {
2707  if (m_nrhs<4)
2708  return NULL;
2709 
2710  char* dtype=get_str_from_str_or_direct(len);
2711  int32_t size=get_int_from_int_or_str();
2712  int32_t degree=3;
2713  bool inhomogene=false;
2714  bool normalize=true;
2715 
2716  if (m_nrhs>4)
2717  {
2718  degree=get_int_from_int_or_str();
2719  if (m_nrhs>5)
2720  {
2721  inhomogene=get_bool_from_bool_or_str();
2722  if (m_nrhs>6)
2723  normalize=get_bool_from_bool_or_str();
2724  }
2725  }
2726 
2727  if (strmatch(dtype, "CHAR"))
2728  {
2729  kernel=ui_kernel->create_polymatchstring(
2730  size, degree, inhomogene, normalize);
2731  }
2732  else if (strmatch(dtype, "WORD"))
2733  {
2734  kernel=ui_kernel->create_polymatchwordstring(
2735  size, degree, inhomogene, normalize);
2736  }
2737 
2738  SG_FREE(dtype);
2739  }
2740  else if (strmatch(type, "MATCH"))
2741  {
2742  if (m_nrhs<4)
2743  return NULL;
2744 
2745  char* dtype=get_str_from_str_or_direct(len);
2746  if (strmatch(dtype, "WORD"))
2747  {
2748  int32_t size=get_int_from_int_or_str();
2749  int32_t d=3;
2750  bool normalize=true;
2751 
2752  if (m_nrhs>4)
2753  d=get_int_from_int_or_str();
2754  if (m_nrhs>5)
2755  normalize=get_bool_from_bool_or_str();
2756 
2757  kernel=ui_kernel->create_matchwordstring(size, d, normalize);
2758  }
2759 
2760  SG_FREE(dtype);
2761  }
2762  else if (strmatch(type, "WEIGHTEDCOMMSTRING") || strmatch(type, "COMMSTRING"))
2763  {
2764  char* dtype=get_str_from_str_or_direct(len);
2765  int32_t size=get_int_from_int_or_str();
2766  bool use_sign=false;
2767  char* norm_str=NULL;
2768 
2769  if (m_nrhs>4)
2770  {
2771  use_sign=get_bool_from_bool_or_str();
2772 
2773  if (m_nrhs>5)
2774  norm_str=get_str_from_str_or_direct(len);
2775  }
2776 
2777  if (strmatch(dtype, "WORD"))
2778  {
2779  if (strmatch(type, "WEIGHTEDCOMMSTRING"))
2780  {
2781  kernel=ui_kernel->create_commstring(
2782  size, use_sign, norm_str, K_WEIGHTEDCOMMWORDSTRING);
2783  }
2784  else if (strmatch(type, "COMMSTRING"))
2785  {
2786  kernel=ui_kernel->create_commstring(
2787  size, use_sign, norm_str, K_COMMWORDSTRING);
2788  }
2789  }
2790  else if (strmatch(dtype, "ULONG"))
2791  {
2792  kernel=ui_kernel->create_commstring(
2793  size, use_sign, norm_str, K_COMMULONGSTRING);
2794  }
2795 
2796  SG_FREE(dtype);
2797  SG_FREE(norm_str);
2798  }
2799  else if (strmatch(type, "CHI2"))
2800  {
2801  if (m_nrhs<4)
2802  return NULL;
2803 
2804  char* dtype=get_str_from_str_or_direct(len);
2805  if (strmatch(dtype, "REAL"))
2806  {
2807  int32_t size=get_int_from_int_or_str();
2808  float64_t width=1;
2809 
2810  if (m_nrhs>4)
2811  width=get_real_from_real_or_str();
2812 
2813  kernel=ui_kernel->create_chi2(size, width);
2814  }
2815 
2816  SG_FREE(dtype);
2817  }
2818  else if (strmatch(type, "FIXEDDEGREE"))
2819  {
2820  if (m_nrhs<4)
2821  return NULL;
2822 
2823  char* dtype=get_str_from_str_or_direct(len);
2824  if (strmatch(dtype, "CHAR"))
2825  {
2826  int32_t size=get_int_from_int_or_str();
2827  int32_t d=3;
2828  if (m_nrhs>4)
2829  d=get_int_from_int_or_str();
2830 
2831  kernel=ui_kernel->create_fixeddegreestring(size, d);
2832  }
2833 
2834  SG_FREE(dtype);
2835  }
2836  else if (strmatch(type, "LOCALALIGNMENT"))
2837  {
2838  if (m_nrhs<4)
2839  return NULL;
2840 
2841  char* dtype=get_str_from_str_or_direct(len);
2842  if (strmatch(dtype, "CHAR"))
2843  {
2844  int32_t size=get_int_from_int_or_str();
2845 
2846  kernel=ui_kernel->create_localalignmentstring(size);
2847  }
2848 
2849  SG_FREE(dtype);
2850  }
2851  else if (strmatch(type, "OLIGO"))
2852  {
2853  if (m_nrhs<6)
2854  return NULL;
2855 
2856  char* dtype=get_str_from_str_or_direct(len);
2857  if (strmatch(dtype, "CHAR"))
2858  {
2859  int32_t size=get_int_from_int_or_str();
2860  int32_t k=get_int_from_int_or_str();
2861  float64_t w=get_real_from_real_or_str();
2862 
2863  kernel=ui_kernel->create_oligo(size, k, w);
2864  }
2865 
2866  SG_FREE(dtype);
2867  }
2868  else if (strmatch(type, "WEIGHTEDDEGREEPOS2") ||
2869  strmatch(type, "WEIGHTEDDEGREEPOS2_NONORM"))
2870  {
2871  if (m_nrhs<7)
2872  return NULL;
2873 
2874  char* dtype=get_str_from_str_or_direct(len);
2875  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2876  {
2877  int32_t size=get_int_from_int_or_str();
2878  int32_t order=get_int_from_int_or_str();
2879  int32_t max_mismatch=get_int_from_int_or_str();
2880  int32_t length=get_int_from_int_or_str();
2881  int32_t* shifts=NULL;
2882  int32_t l=0;
2883  get_vector_from_int_vector_or_str(shifts, l);
2884 
2885  ASSERT(l==length)
2886 
2887  bool use_normalization=true;
2888  if (strmatch(type, "WEIGHTEDDEGREEPOS2_NONORM"))
2889  use_normalization=false;
2890 
2891  kernel=ui_kernel->create_weighteddegreepositionstring2(
2892  size, order, max_mismatch, shifts, length,
2893  use_normalization);
2894 
2895  SG_FREE(shifts);
2896  }
2897 
2898  SG_FREE(dtype);
2899  }
2900  else if (strmatch(type, "WEIGHTEDDEGREEPOS3"))
2901  {
2902  if (m_nrhs<7)
2903  return NULL;
2904 
2905  char* dtype=get_str_from_str_or_direct(len);
2906  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2907  {
2908  int32_t size=get_int_from_int_or_str();
2909  int32_t order=get_int_from_int_or_str();
2910  int32_t max_mismatch=get_int_from_int_or_str();
2911  int32_t length=get_int_from_int_or_str();
2912  int32_t mkl_stepsize=get_int_from_int_or_str();
2913  int32_t* shifts=NULL;
2914  int32_t l=0;
2915  get_vector_from_int_vector_or_str(shifts, l);
2916  ASSERT(l==length)
2917 
2918  float64_t* position_weights=NULL;
2919  if (m_nrhs>9+length)
2920  {
2921  get_vector_from_real_vector_or_str(
2922  position_weights, length);
2923  }
2924 
2925  kernel=ui_kernel->create_weighteddegreepositionstring3(
2926  size, order, max_mismatch, shifts, length,
2927  mkl_stepsize, position_weights);
2928 
2929  SG_FREE(position_weights);
2930  SG_FREE(shifts);
2931  }
2932 
2933  SG_FREE(dtype);
2934  }
2935  else if (strmatch(type, "WEIGHTEDDEGREEPOS"))
2936  {
2937  if (m_nrhs<4)
2938  return NULL;
2939 
2940  char* dtype=get_str_from_str_or_direct(len);
2941  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2942  {
2943  int32_t size=get_int_from_int_or_str();
2944  int32_t order=3;
2945  int32_t max_mismatch=0;
2946  int32_t length=0;
2947  int32_t center=0;
2948  float64_t step=1;
2949 
2950  if (m_nrhs>4)
2951  {
2952  order=get_int_from_int_or_str();
2953 
2954  if (m_nrhs>5)
2955  {
2956  max_mismatch=get_int_from_int_or_str();
2957 
2958  if (m_nrhs>6)
2959  {
2960  length=get_int_from_int_or_str();
2961 
2962  if (m_nrhs>7)
2963  {
2964  center=get_int_from_int_or_str();
2965 
2966  if (m_nrhs>8)
2967  step=get_real_from_real_or_str();
2968  }
2969  }
2970  }
2971  }
2972 
2973  kernel=ui_kernel->create_weighteddegreepositionstring(
2974  size, order, max_mismatch, length, center, step);
2975  }
2976 
2977  SG_FREE(dtype);
2978  }
2979  else if (strmatch(type, "WEIGHTEDDEGREE"))
2980  {
2981  if (m_nrhs<4)
2982  return NULL;
2983 
2984  char* dtype=get_str_from_str_or_direct(len);
2985  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2986  {
2987  int32_t size=get_int_from_int_or_str();
2988  int32_t order=3;
2989  int32_t max_mismatch=0;
2990  bool use_normalization=true;
2991  int32_t mkl_stepsize=1;
2992  bool block_computation=true;
2993  int32_t single_degree=-1;
2994 
2995  if (m_nrhs>4)
2996  {
2997  order=get_int_from_int_or_str();
2998 
2999  if (m_nrhs>5)
3000  {
3001  max_mismatch=get_int_from_int_or_str();
3002 
3003  if (m_nrhs>6)
3004  {
3005  use_normalization=get_bool_from_bool_or_str();
3006 
3007  if (m_nrhs>7)
3008  {
3009  mkl_stepsize=get_int_from_int_or_str();
3010 
3011  if (m_nrhs>8)
3012  {
3013  block_computation=get_int_from_int_or_str();
3014 
3015  if (m_nrhs>9)
3016  single_degree=get_int_from_int_or_str();
3017  }
3018  }
3019  }
3020  }
3021  }
3022 
3023  kernel=ui_kernel->create_weighteddegreestring(
3024  size, order, max_mismatch, use_normalization,
3025  mkl_stepsize, block_computation, single_degree);
3026  }
3027 
3028  SG_FREE(dtype);
3029  }
3030  else if (strmatch(type, "WEIGHTEDDEGREERBF"))
3031  {
3032  if (m_nrhs<5)
3033  return NULL;
3034 
3035  char* dtype=get_str_from_str_or_direct(len);
3036  int32_t size=get_int_from_int_or_str();
3037  int32_t nof_properties=get_int_from_int_or_str();
3038  int32_t degree=1;
3039  float64_t width=1;
3040  if (m_nrhs>5)
3041  {
3042  degree=get_int_from_int_or_str();
3043  if (m_nrhs>6)
3044  {
3045  width=get_real_from_real_or_str();
3046  }
3047 
3048  }
3049  //if (strmatch(dtype, "REAL"))
3050 
3051  kernel=ui_kernel->create_weighteddegreerbf(size, degree, nof_properties, width);
3052 
3053  SG_FREE(dtype);
3054 
3055  }
3056  else if (strmatch(type, "SPECTRUMMISMATCHRBF"))
3057  {
3058  if (m_nrhs<7)
3059  return NULL;
3060 
3061  char* dtype=get_str_from_str_or_direct(len);
3062  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
3063  {
3064  int32_t size=get_int_from_int_or_str();
3065  int32_t degree=get_int_from_int_or_str();
3066  int32_t max_mismatch=get_int_from_int_or_str();
3067  float64_t width=get_real_from_real_or_str();
3068  float64_t* AA_matrix = NULL;
3069 
3070  //int32_t length=128*128;
3071  //get_vector_from_real_vector_or_str(AA_matrix, length);
3072  float64_t* helper_matrix=NULL;
3073  int32_t N=0;
3074  int32_t M=0;
3075  get_matrix(helper_matrix, N, M);
3076 
3077  if (N == 128 && M == 128)
3078  {
3079  AA_matrix=SG_MALLOC(float64_t, N*M);
3080  memcpy(AA_matrix, helper_matrix, N*M*sizeof(float64_t)) ;
3081  kernel=ui_kernel->create_spectrummismatchrbf(size, AA_matrix, 128, 128, max_mismatch, degree, width);
3082  }
3083  else
3084  {
3085  SG_ERROR("Matrix size %d %d\n", N, M)
3086  }
3087  }
3088  SG_FREE(dtype);
3089 
3090  }
3091 
3092  else if (strmatch(type, "SLIK") || strmatch(type, "LIK"))
3093  {
3094  if (m_nrhs<4)
3095  return NULL;
3096 
3097  char* dtype=get_str_from_str_or_direct(len);
3098  if (strmatch(dtype, "CHAR"))
3099  {
3100  int32_t size=get_int_from_int_or_str();
3101  int32_t length=3;
3102  int32_t inner_degree=3;
3103  int32_t outer_degree=1;
3104 
3105  if (m_nrhs>4)
3106  {
3107  length=get_int_from_int_or_str();
3108 
3109  if (m_nrhs>5)
3110  {
3111  inner_degree=get_int_from_int_or_str();
3112 
3113  if (m_nrhs>6)
3114  outer_degree=get_int_from_int_or_str();
3115  }
3116  }
3117 
3118  if (strmatch(type, "SLIK"))
3119  {
3120  kernel=ui_kernel->create_localityimprovedstring(
3121  size, length, inner_degree, outer_degree,
3123  }
3124  else
3125  {
3126  kernel=ui_kernel->create_localityimprovedstring(
3127  size, length, inner_degree, outer_degree,
3129  }
3130  }
3131 
3132  SG_FREE(dtype);
3133  }
3134  else if (strmatch(type, "POLY"))
3135  {
3136  if (m_nrhs<4)
3137  return NULL;
3138 
3139  char* dtype=get_str_from_str_or_direct(len);
3140  int32_t size=get_int_from_int_or_str();
3141  int32_t degree=2;
3142  bool inhomogene=false;
3143  bool normalize=true;
3144 
3145  if (m_nrhs>4)
3146  {
3147  degree=get_int_from_int_or_str();
3148 
3149  if (m_nrhs>5)
3150  {
3151  inhomogene=get_bool_from_bool_or_str();
3152 
3153  if (m_nrhs>6)
3154  normalize=get_bool_from_bool_or_str();
3155  }
3156  }
3157 
3158  if (strmatch(dtype, "REAL"))
3159  {
3160  kernel=ui_kernel->create_poly(
3161  size, degree, inhomogene, normalize);
3162  }
3163  else if (strmatch(dtype, "SPARSEREAL"))
3164  {
3165  kernel=ui_kernel->create_sparsepoly(
3166  size, degree, inhomogene, normalize);
3167  }
3168 
3169  SG_FREE(dtype);
3170  }
3171  else if (strmatch(type, "SIGMOID"))
3172  {
3173  if (m_nrhs<4)
3174  return NULL;
3175 
3176  char* dtype=get_str_from_str_or_direct(len);
3177  if (strmatch(dtype, "REAL"))
3178  {
3179  int32_t size=get_int_from_int_or_str();
3180  float64_t gamma=0.01;
3181  float64_t coef0=0;
3182 
3183  if (m_nrhs>4)
3184  {
3185  gamma=get_real_from_real_or_str();
3186 
3187  if (m_nrhs>5)
3188  coef0=get_real_from_real_or_str();
3189  }
3190 
3191  kernel=ui_kernel->create_sigmoid(size, gamma, coef0);
3192  }
3193 
3194  SG_FREE(dtype);
3195  }
3196  else if (strmatch(type, "GAUSSIAN")) // RBF
3197  {
3198  if (m_nrhs<4)
3199  return NULL;
3200 
3201  char* dtype=get_str_from_str_or_direct(len);
3202  int32_t size=get_int_from_int_or_str();
3203  float64_t width=1;
3204  if (m_nrhs>4)
3205  width=get_real_from_real_or_str();
3206 
3207  if (strmatch(dtype, "REAL"))
3208  kernel=ui_kernel->create_gaussian(size, width);
3209  else if (strmatch(dtype, "SPARSEREAL"))
3210  kernel=ui_kernel->create_sparsegaussian(size, width);
3211 
3212  SG_FREE(dtype);
3213  }
3214  else if (strmatch(type, "GAUSSIANSHIFT")) // RBF
3215  {
3216  if (m_nrhs<7)
3217  return NULL;
3218 
3219  char* dtype=get_str_from_str_or_direct(len);
3220  if (strmatch(dtype, "REAL"))
3221  {
3222  int32_t size=get_int_from_int_or_str();
3223  float64_t width=get_real_from_real_or_str();
3224  int32_t max_shift=get_int_from_int_or_str();
3225  int32_t shift_step=get_int_from_int_or_str();
3226 
3227  kernel=ui_kernel->create_gaussianshift(
3228  size, width, max_shift, shift_step);
3229  }
3230 
3231  SG_FREE(dtype);
3232  }
3233  else if (strmatch(type, "CUSTOM"))
3234  {
3235  if (m_nrhs!=4 || !create_return_values(0))
3236  return NULL;
3237 
3238  float64_t* kmatrix=NULL;
3239  int32_t num_feat=0;
3240  int32_t num_vec=0;
3241  get_matrix(kmatrix, num_feat, num_vec);
3242 
3243  int32_t tlen=0;
3244  char* ktype=get_string(tlen);
3245 
3246  if (!strmatch(ktype, "DIAG") &&
3247  !strmatch(ktype, "FULL") &&
3248  !strmatch(ktype, "FULL2DIAG"))
3249  {
3250  SG_FREE(ktype);
3251  SG_ERROR("Undefined type, not DIAG, FULL or FULL2DIAG.\n")
3252  }
3253 
3254  bool source_is_diag=false;
3255  bool dest_is_diag=false;
3256 
3257  if (strmatch(ktype, "FULL2DIAG"))
3258  dest_is_diag=true;
3259  else if (strmatch(ktype, "DIAG"))
3260  {
3261  source_is_diag=true;
3262  dest_is_diag=true;
3263  }
3264 
3265  kernel=ui_kernel->create_custom(kmatrix, num_feat, num_vec,
3266  source_is_diag, dest_is_diag);
3267  }
3268  else if (strmatch(type, "CONST"))
3269  {
3270  if (m_nrhs<4)
3271  return NULL;
3272 
3273  char* dtype=get_str_from_str_or_direct(len);
3274  if (strmatch(dtype, "REAL"))
3275  {
3276  int32_t size=get_int_from_int_or_str();
3277  float64_t c=1;
3278  if (m_nrhs>4)
3279  c=get_real_from_real_or_str();
3280 
3281  kernel=ui_kernel->create_const(size, c);
3282  }
3283 
3284  SG_FREE(dtype);
3285  }
3286  else if (strmatch(type, "DIAG"))
3287  {
3288  if (m_nrhs<4)
3289  return NULL;
3290 
3291  char* dtype=get_str_from_str_or_direct(len);
3292  if (strmatch(dtype, "REAL"))
3293  {
3294  int32_t size=get_int_from_int_or_str();
3295  float64_t diag=1;
3296  if (m_nrhs>4)
3297  diag=get_real_from_real_or_str();
3298 
3299  kernel=ui_kernel->create_diag(size, diag);
3300  }
3301 
3302  SG_FREE(dtype);
3303  }
3304 
3305  else if (strmatch(type, "TPPK"))
3306  {
3307  if (m_nrhs!=5)
3308  return NULL;
3309 
3310  char* dtype=get_str_from_str_or_direct(len);
3311  if (strmatch(dtype, "INT"))
3312  {
3313  int32_t size=get_int_from_int_or_str();
3314  float64_t* km=NULL;
3315  int32_t rows=0;
3316  int32_t cols=0;
3317  get_matrix(km, rows, cols);
3318  kernel=ui_kernel->create_tppk(size, km, rows, cols);
3319  }
3320 
3321  SG_FREE(dtype);
3322  }
3323  else
3325 
3326  SG_FREE(type);
3327  SG_DEBUG("created kernel: %p\n", kernel)
3328  return kernel;
3329 }
3330 
3331 
3332 CFeatures* CSGInterface::create_custom_string_features(CStringFeatures<uint8_t>* orig_feat)
3333 {
3334  CFeatures* feat=orig_feat;
3335 
3336  if (m_nrhs>4)
3337  {
3338  int32_t start=-1;
3339  int32_t order=0;
3340  int32_t from_order=0;
3341  bool normalize=true;
3342 
3343  int32_t feature_class_len=0;
3344  char* feature_class_str=get_string(feature_class_len);
3345  ASSERT(feature_class_str)
3346  CAlphabet* alphabet=NULL;
3347  if (strmatch(feature_class_str, "WD"))
3348  {
3349  if (m_nrhs!=7)
3350  SG_ERROR("Please specify alphabet, WD, order, from_order\n")
3351 
3352  alphabet=new CAlphabet(RAWDNA);
3353  order=get_int();
3354  from_order=get_int();
3355  feat = new CWDFeatures((CStringFeatures<uint8_t>*) feat, order, from_order);
3356  }
3357  else if (strmatch(feature_class_str, "WSPEC"))
3358  {
3359  if (m_nrhs!=8)
3360  SG_ERROR("Please specify alphabet, order, WSPEC, start, normalize\n")
3361 
3362  alphabet=new CAlphabet(RAWDNA);
3363  order=get_int();
3364  start=get_int();
3365  normalize=get_bool();
3367  sf->obtain_from_char_features((CStringFeatures<uint8_t>*) feat, start, order, 0, normalize);
3368  sf->add_preprocessor(new CSortWordString());
3369  sf->apply_preprocessor();
3370  SG_UNREF(feat);
3371  feat = new CImplicitWeightedSpecFeatures(sf, normalize);
3372  }
3373  SG_FREE(feature_class_str);
3374 
3375  SG_UNREF(alphabet);
3376  }
3377 
3378  return feat;
3379 }
3380 
3381 CFeatures* CSGInterface::create_custom_real_features(CDenseFeatures<float64_t>* orig_feat)
3382 {
3383  CFeatures* feat=orig_feat;
3384 
3385  if (m_nrhs==6)
3386  {
3387  int32_t degree=0;
3388  int32_t feature_class_len=0;
3389  bool normalize;
3390  char* feature_class_str=get_string(feature_class_len);
3391  ASSERT(feature_class_str)
3392  if (strmatch(feature_class_str, "POLY"))
3393  {
3394  //if (m_nrhs!=7)
3395  // SG_ERROR("Please specify POLY, degree\n")
3396 
3397  degree=get_int();
3398  normalize = get_bool();
3399  feat = new CPolyFeatures((CDenseFeatures<float64_t>*) feat, degree, normalize);
3400 
3401  }
3402  else
3403  SG_ERROR("Unknown feature class: %s\n", feature_class_str)
3404 
3405  SG_FREE(feature_class_str);
3406  }
3407 
3408  return feat;
3409 }
3410 
3411 bool CSGInterface::cmd_init_kernel()
3412 {
3414  return true;
3415 }
3416 
3417 bool CSGInterface::cmd_clean_kernel()
3418 {
3419  if (m_nrhs<1 || !create_return_values(0))
3420  return false;
3421 
3422  return ui_kernel->clean_kernel();
3423 }
3424 
3425 bool CSGInterface::cmd_save_kernel()
3426 {
3427  if (m_nrhs<2 || !create_return_values(0))
3428  return false;
3429 
3430  int32_t len=0;
3431  char* filename=get_str_from_str_or_direct(len);
3432 
3433  bool success=ui_kernel->save_kernel(filename);
3434 
3435  SG_FREE(filename);
3436  return success;
3437 }
3438 
3439 bool CSGInterface::cmd_get_kernel_matrix()
3440 {
3441  if (m_nrhs>2 || !create_return_values(1))
3442  return false;
3443 
3444  int32_t len=0;
3445  char* target=NULL;
3446 
3447  if (m_nrhs==2)
3448  target=get_string(len);
3449  bool success=ui_kernel->init_kernel(target);
3450 
3451  if (success)
3452  {
3453  CKernel* kernel=ui_kernel->get_kernel();
3454  if (!kernel || !kernel->has_features())
3455  SG_ERROR("No kernel defined or not initialized.\n")
3456 
3458  set_matrix(km.matrix, km.num_rows, km.num_cols);
3459  }
3460 
3461  SG_FREE(target);
3462 
3463  return success;
3464 }
3465 
3466 bool CSGInterface::cmd_set_WD_position_weights()
3467 {
3468  if (m_nrhs<2 || m_nrhs>3 || !create_return_values(0))
3469  return false;
3470 
3471  CKernel* kernel=ui_kernel->get_kernel();
3472  if (!kernel)
3473  SG_ERROR("No kernel.\n")
3474 
3475  if (kernel->get_kernel_type()==K_COMBINED)
3476  {
3477  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3478  if (!kernel)
3479  SG_ERROR("No last kernel.\n")
3480 
3481  EKernelType ktype=kernel->get_kernel_type();
3482  if (ktype!=K_WEIGHTEDDEGREE && ktype!=K_WEIGHTEDDEGREEPOS)
3483  SG_ERROR("Unsupported kernel.\n")
3484  }
3485 
3486  bool success=false;
3487  float64_t* weights=NULL;
3488  int32_t dim=0;
3489  int32_t len=0;
3490  get_matrix(weights, dim, len);
3491 
3492  if (kernel->get_kernel_type()==K_WEIGHTEDDEGREE)
3493  {
3495  (CWeightedDegreeStringKernel*) kernel;
3496 
3497  if (dim!=1 && len>0)
3498  SG_ERROR("Dimension mismatch (should be 1 x seq_length or 0x0\n")
3499 
3500  ui_kernel->init_kernel("TRAIN");
3501  success=k->set_position_weights(weights, len);
3502  }
3503  else
3504  {
3507  char* target=NULL;
3508  bool is_train=true;
3509 
3510  if (m_nrhs==3)
3511  {
3512  int32_t tlen=0;
3513  target=get_string(tlen);
3514  if (!target)
3515  {
3516  SG_FREE(weights);
3517  SG_ERROR("Couldn't find second argument to method.\n")
3518  }
3519 
3520  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
3521  {
3522  SG_FREE(target);
3523  SG_ERROR("Second argument none of TRAIN or TEST.\n")
3524  }
3525 
3526  if (strmatch(target, "TEST"))
3527  is_train=false;
3528  }
3529 
3530  if (dim!=1 && len>0)
3531  {
3532  SG_FREE(target);
3533  SG_ERROR("Dimension mismatch (should be 1 x seq_length or 0x0\n")
3534  }
3535 
3536  if (dim==0 && len==0)
3537  {
3538  if (create_return_values(3))
3539  {
3540  if (is_train)
3541  success=k->delete_position_weights_lhs();
3542  else
3543  success=k->delete_position_weights_rhs();
3544  }
3545  else
3546  success=k->delete_position_weights();
3547  }
3548  else
3549  {
3550  if (create_return_values(3))
3551  {
3552  if (is_train)
3553  success=k->set_position_weights_lhs(weights, dim, len);
3554  else
3555  success=k->set_position_weights_rhs(weights, dim, len);
3556  }
3557  else
3558  {
3559  ui_kernel->init_kernel("TRAIN");
3560  k->set_position_weights(SGVector<float64_t>(weights, len));
3561  success=true;
3562  }
3563  }
3564 
3565  SG_FREE(target);
3566  }
3567 
3568  return success;
3569 }
3570 
3571 bool CSGInterface::cmd_get_subkernel_weights()
3572 {
3573  if (m_nrhs!=1 || !create_return_values(1))
3574  return false;
3575 
3576  CKernel *kernel=ui_kernel->get_kernel();
3577  if (!kernel)
3578  SG_ERROR("Invalid kernel.\n")
3579 
3580  EKernelType ktype=kernel->get_kernel_type();
3581  const float64_t* weights=NULL;
3582 
3583  if (ktype==K_COMBINED)
3584  {
3585  int32_t num_weights=-1;
3586  weights=((CCombinedKernel *) kernel)->get_subkernel_weights(num_weights);
3587 
3588  // matrices of shape 1 x num_weight are returned
3589  set_matrix(weights, 1, num_weights);
3590  return true;
3591  }
3592 
3593  int32_t degree=-1;
3594  int32_t length=-1;
3595 
3596  if (ktype==K_WEIGHTEDDEGREE)
3597  {
3598  weights=((CWeightedDegreeStringKernel *) kernel)->
3599  get_degree_weights(degree, length);
3600  }
3601  else if (ktype==K_WEIGHTEDDEGREEPOS)
3602  {
3603  weights=((CWeightedDegreePositionStringKernel *) kernel)->
3604  get_degree_weights(degree, length);
3605  }
3606  else
3607  SG_ERROR("Setting subkernel weights not supported on this kernel.\n")
3608 
3609  if (length==0)
3610  length=1;
3611 
3612  set_matrix(weights, degree, length);
3613  return true;
3614 }
3615 
3616 bool CSGInterface::cmd_set_subkernel_weights()
3617 {
3618  if (m_nrhs!=2 || !create_return_values(0))
3619  return false;
3620 
3621  CKernel* kernel=ui_kernel->get_kernel();
3622  if (!kernel)
3623  SG_ERROR("No kernel.\n")
3624 
3625  bool success=false;
3626  float64_t* weights=NULL;
3627  int32_t dim=0;
3628  int32_t len=0;
3629  get_matrix(weights, dim, len);
3630 
3631  EKernelType ktype=kernel->get_kernel_type();
3632  if (ktype==K_WEIGHTEDDEGREE)
3633  {
3635  (CWeightedDegreeStringKernel*) kernel;
3636  int32_t degree=k->get_degree();
3637  if (dim!=degree || len<1)
3638  SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree)
3639 
3640  if (len==1)
3641  len=0;
3642 
3643  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3644  }
3645  else if (ktype==K_WEIGHTEDDEGREEPOS)
3646  {
3649  int32_t degree=k->get_degree();
3650  if (dim!=degree || len<1)
3651  SG_ERROR("WDPos: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree)
3652 
3653  if (len==1)
3654  len=0;
3655 
3656  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3657  }
3658  else // all other kernels
3659  {
3660  int32_t num_subkernels=kernel->get_num_subkernels();
3661  if (dim!=1 || len!=num_subkernels)
3662  SG_ERROR("All: Dimension mismatch (should be 1 x num_subkernels)\n")
3663 
3664  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3665  success=true;
3666  }
3667 
3668  return success;
3669 }
3670 
3671 bool CSGInterface::cmd_set_subkernel_weights_combined()
3672 {
3673  if (m_nrhs!=3 || !create_return_values(0))
3674  return false;
3675 
3676  CKernel* kernel=ui_kernel->get_kernel();
3677  if (!kernel)
3678  SG_ERROR("No kernel.\n")
3679  if (kernel->get_kernel_type()!=K_COMBINED)
3680  SG_ERROR("Only works for combined kernels.\n")
3681 
3682  bool success=false;
3683  float64_t* weights=NULL;
3684  int32_t dim=0;
3685  int32_t len=0;
3686  get_matrix(weights, dim, len);
3687 
3688  int32_t idx=get_int();
3689  SG_DEBUG("using kernel_idx=%i\n", idx)
3690 
3691  kernel=((CCombinedKernel*) kernel)->get_kernel(idx);
3692  if (!kernel)
3693  SG_ERROR("No subkernel at idx %d.\n", idx)
3694 
3695  EKernelType ktype=kernel->get_kernel_type();
3696  if (ktype==K_WEIGHTEDDEGREE)
3697  {
3699  (CWeightedDegreeStringKernel*) kernel;
3700  int32_t degree=k->get_degree();
3701  if (dim!=degree || len<1)
3702  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n")
3703 
3704  if (len==1)
3705  len=0;
3706 
3707  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3708  }
3709  else if (ktype==K_WEIGHTEDDEGREEPOS)
3710  {
3713  int32_t degree=k->get_degree();
3714  if (dim!=degree || len<1)
3715  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n")
3716 
3717  if (len==1)
3718  len=0;
3719 
3720  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3721  }
3722  else // all other kernels
3723  {
3724  int32_t num_subkernels=kernel->get_num_subkernels();
3725  if (dim!=1 || len!=num_subkernels)
3726  SG_ERROR("Dimension mismatch (should be 1 x num_subkernels)\n")
3727 
3728  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3729  success=true;
3730  }
3731 
3732  return success;
3733 }
3734 
3735 bool CSGInterface::cmd_get_dotfeature_weights_combined()
3736 {
3737  if (m_nrhs!=2 || !create_return_values(1))
3738  return false;
3739 
3740  int32_t tlen=0;
3741  char* target=get_string(tlen);
3742  CFeatures* features=NULL;
3743 
3744  if (strmatch(target, "TRAIN"))
3745  features=ui_features->get_train_features();
3746  else if (strmatch(target, "TEST"))
3747  features=ui_features->get_test_features();
3748  else
3749  {
3750  SG_FREE(target);
3751  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
3752  }
3753  SG_FREE(target);
3754 
3755  if (!features)
3756  SG_ERROR("No features.\n")
3757  if (features->get_feature_class()!=C_COMBINED_DOT)
3758  SG_ERROR("Only works for combined dot features.\n")
3759 
3760  SGVector<float64_t> weights = ((CCombinedDotFeatures*) features)->get_subfeature_weights();
3761  set_vector(weights.vector, weights.vlen);
3762 
3763  return true;
3764 }
3765 
3766 bool CSGInterface::cmd_set_dotfeature_weights_combined()
3767 {
3768  if (m_nrhs!=3 || !create_return_values(0))
3769  return false;
3770 
3771  int32_t tlen=0;
3772  char* target=get_string(tlen);
3773  CFeatures* features=NULL;
3774 
3775  if (strmatch(target, "TRAIN"))
3776  features=ui_features->get_train_features();
3777  else if (strmatch(target, "TEST"))
3778  features=ui_features->get_test_features();
3779  else
3780  {
3781  SG_FREE(target);
3782  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n")
3783  }
3784  SG_FREE(target);
3785 
3786  if (!features)
3787  SG_ERROR("No features.\n")
3788  if (features->get_feature_class()!=C_COMBINED_DOT)
3789  SG_ERROR("Only works for combined dot features.\n")
3790 
3791  float64_t* weights=NULL;
3792  int32_t dim=0;
3793  int32_t len=0;
3794  get_matrix(weights, dim, len);
3795 
3796  ((CCombinedDotFeatures*) features)->set_subfeature_weights(SGVector<float64_t>(weights, len));
3797 
3798  return true;
3799 }
3800 
3801 bool CSGInterface::cmd_set_last_subkernel_weights()
3802 {
3803  if (m_nrhs!=2 || !create_return_values(0))
3804  return false;
3805 
3806  CKernel* kernel=ui_kernel->get_kernel();
3807  if (!kernel)
3808  SG_ERROR("No kernel.\n")
3809  if (kernel->get_kernel_type()!=K_COMBINED)
3810  SG_ERROR("Only works for Combined kernels.\n")
3811 
3812  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3813  if (!kernel)
3814  SG_ERROR("No last kernel.\n")
3815 
3816  bool success=false;
3817  float64_t* weights=NULL;
3818  int32_t dim=0;
3819  int32_t len=0;
3820  get_matrix(weights, dim, len);
3821 
3822  EKernelType ktype=kernel->get_kernel_type();
3823  if (ktype==K_WEIGHTEDDEGREE)
3824  {
3826  if (dim!=k->get_degree() || len<1)
3827  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n")
3828 
3829  if (len==1)
3830  len=0;
3831 
3832  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3833  }
3834  else if (ktype==K_WEIGHTEDDEGREEPOS)
3835  {
3838  if (dim!=k->get_degree() || len<1)
3839  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n")
3840 
3841  if (len==1)
3842  len=0;
3843 
3844  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3845  }
3846  else // all other kernels
3847  {
3848  int32_t num_subkernels=kernel->get_num_subkernels();
3849  if (dim!=1 || len!=num_subkernels)
3850  SG_ERROR("Dimension mismatch (should be 1 x num_subkernels)\n")
3851 
3852  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3853  success=true;
3854  }
3855 
3856  return success;
3857 }
3858 
3859 bool CSGInterface::cmd_get_WD_position_weights()
3860 {
3861  if (m_nrhs!=1 || !create_return_values(1))
3862  return false;
3863 
3864  CKernel* kernel=ui_kernel->get_kernel();
3865  if (!kernel)
3866  SG_ERROR("No kernel.\n")
3867 
3868  if (kernel->get_kernel_type()==K_COMBINED)
3869  {
3870  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3871  if (!kernel)
3872  SG_ERROR("Couldn't find last kernel.\n")
3873 
3874  EKernelType ktype=kernel->get_kernel_type();
3875  if (ktype!=K_WEIGHTEDDEGREE && ktype!=K_WEIGHTEDDEGREEPOS)
3876  SG_ERROR("Wrong subkernel type.\n")
3877  }
3878 
3879  int32_t len=0;
3880  const float64_t* position_weights;
3881 
3882  if (kernel->get_kernel_type()==K_WEIGHTEDDEGREE)
3883  position_weights=((CWeightedDegreeStringKernel*) kernel)->get_position_weights(len);
3884  else
3885  position_weights=((CWeightedDegreePositionStringKernel*) kernel)->get_position_weights(len);
3886 
3887  if (position_weights==NULL)
3888  set_vector(position_weights, 0);
3889  else
3890  set_vector(position_weights, len);
3891 
3892  return true;
3893 }
3894 
3895 bool CSGInterface::cmd_get_last_subkernel_weights()
3896 {
3897  if (m_nrhs!=1 || !create_return_values(1))
3898  return false;
3899 
3900  CKernel* kernel=ui_kernel->get_kernel();
3901  EKernelType ktype=kernel->get_kernel_type();
3902  if (!kernel)
3903  SG_ERROR("No kernel.\n")
3904  if (ktype!=K_COMBINED)
3905  SG_ERROR("Only works for Combined kernels.\n")
3906 
3907  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3908  if (!kernel)
3909  SG_ERROR("Couldn't find last kernel.\n")
3910 
3911  int32_t degree=0;
3912  int32_t len=0;
3913 
3914  if (ktype==K_COMBINED)
3915  {
3916  int32_t num_weights=0;
3917  const float64_t* weights=
3918  ((CCombinedKernel*) kernel)->get_subkernel_weights(num_weights);
3919 
3920  set_vector(weights, num_weights);
3921  return true;
3922  }
3923 
3924  float64_t* weights=NULL;
3925  if (ktype==K_WEIGHTEDDEGREE)
3926  weights=((CWeightedDegreeStringKernel*) kernel)->
3927  get_degree_weights(degree, len);
3928  else if (ktype==K_WEIGHTEDDEGREEPOS)
3929  weights=((CWeightedDegreePositionStringKernel*) kernel)->
3930  get_degree_weights(degree, len);
3931  else
3932  SG_ERROR("Only works for Weighted Degree (Position) kernels.\n")
3933 
3934  if (len==0)
3935  len=1;
3936 
3937  set_matrix(weights, degree, len);
3938 
3939  return true;
3940 }
3941 
3942 bool CSGInterface::cmd_compute_by_subkernels()
3943 {
3944  if (m_nrhs!=1 || !create_return_values(1))
3945  return false;
3946 
3947  CKernel* kernel=ui_kernel->get_kernel();
3948  if (!kernel)
3949  SG_ERROR("No kernel.\n")
3950  if (!kernel->get_rhs())
3951  SG_ERROR("No rhs.\n")
3952 
3953  int32_t num_vec=kernel->get_rhs()->get_num_vectors();
3954  int32_t degree=0;
3955  int32_t len=0;
3956  EKernelType ktype=kernel->get_kernel_type();
3957 
3958  // it would be nice to have a common base class for the WD kernels
3959  if (ktype==K_WEIGHTEDDEGREE)
3960  {
3962  k->get_degree_weights(degree, len);
3963  if (!k->is_tree_initialized())
3964  SG_ERROR("Kernel optimization not initialized.\n")
3965  }
3966  else if (ktype==K_WEIGHTEDDEGREEPOS)
3967  {
3970  k->get_degree_weights(degree, len);
3971  if (!k->is_tree_initialized())
3972  SG_ERROR("Kernel optimization not initialized.\n")
3973  }
3974  else
3975  SG_ERROR("Only works for Weighted Degree (Position) kernels.\n")
3976 
3977  if (len==0)
3978  len=1;
3979 
3980  int32_t num_feat=degree*len;
3981  int32_t num=num_feat*num_vec;
3982  float64_t* result=SG_MALLOC(float64_t, num);
3983 
3984  for (int32_t i=0; i<num; i++)
3985  result[i]=0;
3986 
3987  if (ktype==K_WEIGHTEDDEGREE)
3988  {
3990  for (int32_t i=0; i<num_vec; i++)
3991  k->compute_by_tree(i, &result[i*num_feat]);
3992  }
3993  else
3994  {
3997  for (int32_t i=0; i<num_vec; i++)
3998  k->compute_by_tree(i, &result[i*num_feat]);
3999  }
4000 
4001  set_matrix(result, num_feat, num_vec);
4002  SG_FREE(result);
4003 
4004  return true;
4005 }
4006 
4007 bool CSGInterface::cmd_init_kernel_optimization()
4008 {
4009  if (m_nrhs<1 || !create_return_values(0))
4010  return false;
4011 
4012  return ui_kernel->init_kernel_optimization();
4013 }
4014 
4015 bool CSGInterface::cmd_get_kernel_optimization()
4016 {
4017  if (m_nrhs<1 || !create_return_values(1))
4018  return false;
4019 
4020  CKernel* kernel=ui_kernel->get_kernel();
4021  if (!kernel)
4022  SG_ERROR("No kernel defined.\n")
4023 
4024  switch (kernel->get_kernel_type())
4025  {
4026  case K_WEIGHTEDDEGREEPOS:
4027  {
4028  if (m_nrhs!=2)
4029  SG_ERROR("parameter missing\n")
4030 
4031  int32_t max_order=get_int();
4032  if ((max_order<1) || (max_order>12))
4033  {
4034  SG_WARNING("max_order out of range 1..12 (%d). setting to 1\n", max_order)
4035  max_order=1;
4036  }
4037 
4039  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4040  if (!svm)
4041  SG_ERROR("No SVM defined.\n")
4042 
4043  int32_t num_suppvec=svm->get_num_support_vectors();
4044  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4045  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4046  int32_t num_feat=0;
4047  int32_t num_sym=0;
4048 
4049  for (int32_t i=0; i<num_suppvec; i++)
4050  {
4051  sv_idx[i]=svm->get_support_vector(i);
4052  sv_weight[i]=svm->get_alpha(i);
4053  }
4054 
4055  float64_t* position_weights=k->extract_w(max_order, num_feat,
4056  num_sym, NULL, num_suppvec, sv_idx, sv_weight);
4057  SG_FREE(sv_idx);
4058  SG_FREE(sv_weight);
4059 
4060  set_matrix(position_weights, num_sym, num_feat);
4061  SG_FREE(position_weights);
4062 
4063  return true;
4064  }
4065 
4066  case K_COMMWORDSTRING:
4068  {
4070  int32_t len=0;
4071  float64_t* weights;
4072  k->get_dictionary(len, weights);
4073 
4074  set_vector(weights, len);
4075  return true;
4076  }
4077  case K_LINEAR:
4078  {
4079  CLinearKernel* k=(CLinearKernel*) kernel;
4080  SGVector<float64_t> weights=k->get_w();
4081 
4082  set_vector(weights.vector, weights.size());
4083  return true;
4084  }
4085  default:
4086  SG_ERROR("Unsupported kernel %s.\n", kernel->get_name())
4087  }
4088 
4089  return true;
4090 }
4091 
4092 bool CSGInterface::cmd_delete_kernel_optimization()
4093 {
4094  if (m_nrhs<1 || !create_return_values(0))
4095  return false;
4096 
4097  return ui_kernel->delete_kernel_optimization();
4098 }
4099 
4100 bool CSGInterface::cmd_use_diagonal_speedup()
4101 {
4102  if (m_nrhs<2 || !create_return_values(0))
4103  return false;
4104 
4105  bool speedup=get_bool();
4106 
4107  CKernel* kernel=ui_kernel->get_kernel();
4108  if (!kernel)
4109  SG_ERROR("No kernel defined.\n")
4110 
4111  if (kernel->get_kernel_type()==K_COMBINED)
4112  {
4113  SG_DEBUG("Identified combined kernel.\n")
4114  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
4115  if (!kernel)
4116  SG_ERROR("No last kernel defined.\n")
4117  }
4118 
4119  if (kernel->get_kernel_type()!=K_COMMWORDSTRING)
4120  SG_ERROR("Currently only commwordstring kernel supports diagonal speedup\n")
4121 
4122  ((CCommWordStringKernel*) kernel)->set_use_dict_diagonal_optimization(speedup);
4123 
4124  SG_INFO("Diagonal speedup %s.\n", speedup ? "enabled" : "disabled")
4125 
4126  return true;
4127 }
4128 
4129 bool CSGInterface::cmd_set_kernel_optimization_type()
4130 {
4131  if (m_nrhs<2 || !create_return_values(0))
4132  return false;
4133 
4134  int32_t len=0;
4135  char* opt_type=get_str_from_str_or_direct(len);
4136 
4137  bool success=ui_kernel->set_optimization_type(opt_type);
4138 
4139  SG_FREE(opt_type);
4140  return success;
4141 }
4142 
4143 bool CSGInterface::cmd_set_solver()
4144 {
4145  if (m_nrhs<2 || !create_return_values(0))
4146  return false;
4147 
4148  int32_t len=0;
4149  char* solver=get_str_from_str_or_direct(len);
4150 
4151  bool success=ui_classifier->set_solver(solver);
4152 
4153  SG_FREE(solver);
4154  return success;
4155 }
4156 
4157 bool CSGInterface::cmd_set_constraint_generator()
4158 {
4159  if (m_nrhs<2 || !create_return_values(0))
4160  return false;
4161 
4162  int32_t len=0;
4163  char* cg=get_str_from_str_or_direct(len);
4164 
4165  bool success=ui_classifier->set_constraint_generator(cg);
4166 
4167  SG_FREE(cg);
4168  return success;
4169 }
4170 
4171 bool CSGInterface::cmd_set_prior_probs()
4172 {
4173  if (m_nrhs<3 || !create_return_values(0))
4174  return false;
4175 
4176  CSalzbergWordStringKernel* kernel=
4177  (CSalzbergWordStringKernel*) ui_kernel->get_kernel();
4178  if (kernel->get_kernel_type()!=K_SALZBERG)
4179  SG_ERROR("SalzbergWordStringKernel required for setting prior probs!\n")
4180 
4181  float64_t pos_probs=get_real_from_real_or_str();
4182  float64_t neg_probs=get_real_from_real_or_str();
4183 
4184  kernel->set_prior_probs(pos_probs, neg_probs);
4185 
4186  return true;
4187 }
4188 
4189 bool CSGInterface::cmd_set_prior_probs_from_labels()
4190 {
4191  if (m_nrhs<2 || !create_return_values(0))
4192  return false;
4193 
4194  CSalzbergWordStringKernel* kernel=
4195  (CSalzbergWordStringKernel*) ui_kernel->get_kernel();
4196  if (kernel->get_kernel_type()!=K_SALZBERG)
4197  SG_ERROR("SalzbergWordStringKernel required for setting prior probs!\n")
4198 
4199  float64_t* lab=NULL;
4200  int32_t len=0;
4201  get_vector(lab, len);
4202 
4203  CBinaryLabels* labels=new CBinaryLabels(len);
4204  for (int32_t i=0; i<len; i++)
4205  {
4206  if (!labels->set_label(i, lab[i]))
4207  SG_ERROR("Couldn't set label %d (of %d): %f.\n", i, len, lab[i])
4208  }
4209  SG_FREE(lab);
4210 
4211  kernel->set_prior_probs_from_labels(labels);
4212 
4213  SG_UNREF(labels);
4214  return true;
4215 }
4216 
4217 #ifdef USE_SVMLIGHT
4218 bool CSGInterface::cmd_resize_kernel_cache()
4219 {
4220  if (m_nrhs<2 || !create_return_values(0))
4221  return false;
4222 
4223  int32_t size=get_int_from_int_or_str();
4224  return ui_kernel->resize_kernel_cache(size);
4225 }
4226 #endif //USE_SVMLIGHT
4227 
4228 
4231 bool CSGInterface::cmd_set_distance()
4232 {
4233  if (m_nrhs<3 || !create_return_values(0))
4234  return false;
4235 
4236  CDistance* distance=NULL;
4237  int32_t len=0;
4238  char* type=get_str_from_str_or_direct(len);
4239  char* dtype=get_str_from_str_or_direct(len);
4240 
4241  if (strmatch(type, "MINKOWSKI") && m_nrhs==4)
4242  {
4243  float64_t k=get_real_from_real_or_str();
4244  distance=ui_distance->create_minkowski(k);
4245  }
4246  else if (strmatch(type, "MANHATTAN"))
4247  {
4248  if (strmatch(dtype, "REAL"))
4249  distance=ui_distance->create_generic(D_MANHATTAN);
4250  else if (strmatch(dtype, "WORD"))
4251  distance=ui_distance->create_generic(D_MANHATTANWORD);
4252  }
4253  else if (strmatch(type, "HAMMING") && strmatch(dtype, "WORD"))
4254  {
4255  bool use_sign=false;
4256  if (m_nrhs==4)
4257  use_sign=get_bool_from_bool_or_str(); // optional
4258 
4259  distance=ui_distance->create_hammingword(use_sign);
4260  }
4261  else if (strmatch(type, "CANBERRA"))
4262  {
4263  if (strmatch(dtype, "REAL"))
4264  distance=ui_distance->create_generic(D_CANBERRA);
4265  else if (strmatch(dtype, "WORD"))
4266  distance=ui_distance->create_generic(D_CANBERRAWORD);
4267  }
4268  else if (strmatch(type, "CHEBYSHEW") && strmatch(dtype, "REAL"))
4269  {
4270  distance=ui_distance->create_generic(D_CHEBYSHEW);
4271  }
4272  else if (strmatch(type, "GEODESIC") && strmatch(dtype, "REAL"))
4273  {
4274  distance=ui_distance->create_generic(D_GEODESIC);
4275  }
4276  else if (strmatch(type, "JENSEN") && strmatch(dtype, "REAL"))
4277  {
4278  distance=ui_distance->create_generic(D_JENSEN);
4279  }
4280  else if (strmatch(type, "CHISQUARE") && strmatch(dtype, "REAL"))
4281  {
4282  distance=ui_distance->create_generic(D_CHISQUARE);
4283  }
4284  else if (strmatch(type, "TANIMOTO") && strmatch(dtype, "REAL"))
4285  {
4286  distance=ui_distance->create_generic(D_TANIMOTO);
4287  }
4288  else if (strmatch(type, "COSINE") && strmatch(dtype, "REAL"))
4289  {
4290  distance=ui_distance->create_generic(D_COSINE);
4291  }
4292  else if (strmatch(type, "BRAYCURTIS") && strmatch(dtype, "REAL"))
4293  {
4294  distance=ui_distance->create_generic(D_BRAYCURTIS);
4295  }
4296  else if (strmatch(type, "EUCLIDEAN"))
4297  {
4298  if (strmatch(dtype, "REAL"))
4299  distance=ui_distance->create_generic(D_EUCLIDEAN);
4300  else if (strmatch(dtype, "SPARSEREAL"))
4301  distance=ui_distance->create_generic(D_SPARSEEUCLIDEAN);
4302  }
4303  else
4305 
4306  SG_FREE(type);
4307  SG_FREE(dtype);
4308  return ui_distance->set_distance(distance);
4309 }
4310 
4311 bool CSGInterface::cmd_init_distance()
4312 {
4314  return true;
4315 }
4316 
4317 bool CSGInterface::cmd_get_distance_matrix()
4318 {
4319  if (m_nrhs!=2 || !create_return_values(1))
4320  return false;
4321 
4322  int32_t len=0;
4323  char* target=get_string(len);
4324 
4325  bool success=ui_distance->init_distance(target);
4326 
4327  if (success)
4328  {
4329  CDistance* distance=ui_distance->get_distance();
4330  if (!distance || !distance->has_features())
4331  SG_ERROR("No distance defined or not initialized.\n")
4332 
4333  SGMatrix<float64_t> dmatrix=distance->get_distance_matrix();
4334  set_matrix(dmatrix.matrix, dmatrix.num_rows, dmatrix.num_cols);
4335  }
4336 
4337  return success;
4338 }
4339 
4340 
4341 /* POIM */
4342 
4343 bool CSGInterface::cmd_get_SPEC_consensus()
4344 {
4345  if (m_nrhs!=1 || !create_return_values(1))
4346  return false;
4347 
4348  CKernel* kernel=ui_kernel->get_kernel();
4349  if (!kernel)
4350  SG_ERROR("No kernel.\n")
4351  if (kernel->get_kernel_type()!=K_COMMWORDSTRING)
4352  SG_ERROR("Only works for CommWordString kernels.\n")
4353 
4354  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4355  ASSERT(svm)
4356  int32_t num_suppvec=svm->get_num_support_vectors();
4357  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4358  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4359  int32_t num_feat=0;
4360 
4361  for (int32_t i=0; i<num_suppvec; i++)
4362  {
4363  sv_idx[i]=svm->get_support_vector(i);
4364  sv_weight[i]=svm->get_alpha(i);
4365  }
4366 
4367  char* consensus=((CCommWordStringKernel*) kernel)->compute_consensus(
4368  num_feat, num_suppvec, sv_idx, sv_weight);
4369  SG_FREE(sv_idx);
4370  SG_FREE(sv_weight);
4371 
4372  set_vector(consensus, num_feat);
4373  SG_FREE(consensus);
4374 
4375  return true;
4376 }
4377 
4378 bool CSGInterface::cmd_get_SPEC_scoring()
4379 {
4380  if (m_nrhs!=2 || !create_return_values(1))
4381  return false;
4382 
4383  int32_t max_order=get_int();
4384  CKernel* kernel=ui_kernel->get_kernel();
4385  if (!kernel)
4386  SG_ERROR("No kernel.\n")
4387 
4388  EKernelType ktype=kernel->get_kernel_type();
4389  if (ktype!=K_COMMWORDSTRING && ktype!=K_WEIGHTEDCOMMWORDSTRING)
4390  SG_ERROR("Only works for (Weighted) CommWordString kernels.\n")
4391 
4392  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4393  ASSERT(svm)
4394  int32_t num_suppvec=svm->get_num_support_vectors();
4395  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4396  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4397  int32_t num_feat=0;
4398  int32_t num_sym=0;
4399 
4400  for (int32_t i=0; i<num_suppvec; i++)
4401  {
4402  sv_idx[i]=svm->get_support_vector(i);
4403  sv_weight[i]=svm->get_alpha(i);
4404  }
4405 
4406  if ((max_order<1) || (max_order>8))
4407  {
4408  SG_WARNING("max_order out of range 1..8 (%d). setting to 1\n", max_order)
4409  max_order=1;
4410  }
4411 
4412  float64_t* position_weights=NULL;
4413  if (ktype==K_COMMWORDSTRING)
4414  position_weights=((CCommWordStringKernel*) kernel)->compute_scoring(
4415  max_order, num_feat, num_sym, NULL,
4416  num_suppvec, sv_idx, sv_weight);
4417  else
4418  position_weights=((CWeightedCommWordStringKernel*) kernel)->compute_scoring(
4419  max_order, num_feat, num_sym, NULL,
4420  num_suppvec, sv_idx, sv_weight);
4421  SG_FREE(sv_idx);
4422  SG_FREE(sv_weight);
4423 
4424  set_matrix(position_weights, num_sym, num_feat);
4425  SG_FREE(position_weights);
4426 
4427  return true;
4428 }
4429 
4430 bool CSGInterface::cmd_get_WD_consensus()
4431 {
4432  if (m_nrhs!=1 || !create_return_values(1))
4433  return false;
4434 
4435  CKernel* kernel=ui_kernel->get_kernel();
4436  if (!kernel)
4437  SG_ERROR("No kernel.\n")
4438  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4439  SG_ERROR("Only works for Weighted Degree Position kernels.\n")
4440 
4441  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4442  ASSERT(svm)
4443  int32_t num_suppvec=svm->get_num_support_vectors();
4444  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4445  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4446  int32_t num_feat=0;
4447 
4448  for (int32_t i=0; i<num_suppvec; i++)
4449  {
4450  sv_idx[i]=svm->get_support_vector(i);
4451  sv_weight[i]=svm->get_alpha(i);
4452  }
4453 
4454  char* consensus=((CWeightedDegreePositionStringKernel*) kernel)->compute_consensus(
4455  num_feat, num_suppvec, sv_idx, sv_weight);
4456  SG_FREE(sv_idx);
4457  SG_FREE(sv_weight);
4458 
4459  set_vector(consensus, num_feat);
4460  SG_FREE(consensus);
4461 
4462  return true;
4463 }
4464 
4465 bool CSGInterface::cmd_compute_POIM_WD()
4466 {
4467  if (m_nrhs!=3 || !create_return_values(1))
4468  return false;
4469 
4470  int32_t max_order=get_int();
4471  float64_t* distribution=NULL;
4472  int32_t num_dfeat=0;
4473  int32_t num_dvec=0;
4474  get_matrix(distribution, num_dfeat, num_dvec);
4475 
4476  if (!distribution)
4477  SG_ERROR("Wrong distribution.\n")
4478 
4479  CKernel* kernel=ui_kernel->get_kernel();
4480  if (!kernel)
4481  SG_ERROR("No Kernel.\n")
4482  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4483  SG_ERROR("Only works for Weighted Degree Position kernels.\n")
4484 
4485  int32_t seqlen=0;
4486  int32_t num_sym=0;
4488  (((CWeightedDegreePositionStringKernel*) kernel)->get_lhs());
4489  ASSERT(sfeat)
4490  seqlen=sfeat->get_max_vector_length();
4491  num_sym=(int32_t) sfeat->get_num_symbols();
4492 
4493  if (num_dvec!=seqlen || num_dfeat!=num_sym)
4494  {
4495  SG_ERROR("distribution should have (seqlen x num_sym) elements"
4496  "(seqlen: %d vs. %d symbols: %d vs. %d)\n", seqlen,
4497  num_dvec, num_sym, num_dfeat);
4498  }
4499 
4500  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4501  ASSERT(svm)
4502  int32_t num_suppvec=svm->get_num_support_vectors();
4503  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4504  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4505 
4506  for (int32_t i=0; i<num_suppvec; i++)
4507  {
4508  sv_idx[i]=svm->get_support_vector(i);
4509  sv_weight[i]=svm->get_alpha(i);
4510  }
4511 
4512  /*
4513  if ((max_order < 1) || (max_order > 12))
4514  {
4515  SG_WARNING("max_order out of range 1..12 (%d). setting to 1.\n", max_order)
4516  max_order=1;
4517  }
4518  */
4519 
4520  float64_t* position_weights;
4521  position_weights=((CWeightedDegreePositionStringKernel*) kernel)->compute_POIM(
4522  max_order, seqlen, num_sym, NULL,
4523  num_suppvec, sv_idx, sv_weight, distribution);
4524  SG_FREE(sv_idx);
4525  SG_FREE(sv_weight);
4526 
4527  set_matrix(position_weights, num_sym, seqlen);
4528  SG_FREE(position_weights);
4529 
4530  return true;
4531  }
4532 
4533  bool CSGInterface::cmd_get_WD_scoring()
4534  {
4535  if (m_nrhs!=2 || !create_return_values(1))
4536  return false;
4537 
4538  int32_t max_order=get_int();
4539 
4540  CKernel* kernel=ui_kernel->get_kernel();
4541  if (!kernel)
4542  SG_ERROR("No kernel.\n")
4543  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4544  SG_ERROR("Only works for Weighted Degree Position kernels.\n")
4545 
4546  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4547  ASSERT(svm)
4548  int32_t num_suppvec=svm->get_num_support_vectors();
4549  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4550  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4551  int32_t num_feat=0;
4552  int32_t num_sym=0;
4553 
4554  for (int32_t i=0; i<num_suppvec; i++)
4555  {
4556  sv_idx[i]=svm->get_support_vector(i);
4557  sv_weight[i]=svm->get_alpha(i);
4558  }
4559 
4560  if ((max_order<1) || (max_order>12))
4561  {
4562  SG_WARNING("max_order out of range 1..12 (%d). setting to 1\n", max_order)
4563  max_order=1;
4564  }
4565 
4566  float64_t* position_weights=
4567  ((CWeightedDegreePositionStringKernel*) kernel)->compute_scoring(
4568  max_order, num_feat, num_sym, NULL, num_suppvec, sv_idx, sv_weight);
4569  SG_FREE(sv_idx);
4570  SG_FREE(sv_weight);
4571 
4572  set_matrix(position_weights, num_sym, num_feat);
4573  SG_FREE(position_weights);
4574 
4575  return true;
4576 }
4577 
4578 
4579 /* Classifier */
4580 
4581 bool CSGInterface::cmd_classify()
4582 {
4583  if (m_nrhs!=1 || !create_return_values(1))
4584  return false;
4585 
4586  if (!ui_kernel->get_kernel() ||
4587  ui_kernel->get_kernel()->get_kernel_type()!=K_CUSTOM)
4588  {
4589  CFeatures* feat=ui_features->get_test_features();
4590  if (!feat)
4591  SG_ERROR("No features found.\n")
4592  }
4593 
4594  CLabels* labels=ui_classifier->classify();
4595  if (!labels)
4596  SG_ERROR("Classify failed\n")
4597 
4598  int32_t num_vec=labels->get_num_labels();
4599  float64_t* result=SG_MALLOC(float64_t, num_vec);
4600  for (int32_t i=0; i<num_vec; i++)
4601  {
4602  float64_t value = 0;
4603  switch (labels->get_label_type())
4604  {
4605  case LT_REGRESSION:
4606  value = ((CRegressionLabels*) labels)->get_label(i);
4607  break;
4608  case LT_BINARY:
4609  value = ((CBinaryLabels*) labels)->get_value(i);
4610  break;
4611  case LT_MULTICLASS:
4612  value = ((CMulticlassLabels*) labels)->get_label(i);
4613  break;
4614  default:
4616  break;
4617  }
4618  result[i]=value;
4619  }
4620  SG_UNREF(labels);
4621 
4622  set_vector(result, num_vec);
4623  SG_FREE(result);
4624 
4625  return true;
4626 }
4627 
4628 bool CSGInterface::cmd_classify_example()
4629 {
4630  if (m_nrhs!=2 || !create_return_values(1))
4631  return false;
4632 
4633  int32_t idx=get_int();
4634  float64_t result=0;
4635 
4636  if (!ui_classifier->classify_example(idx, result))
4637  SG_ERROR("Classify_example failed.\n")
4638 
4639  set_real(result);
4640 
4641  return true;
4642 }
4643 
4644 bool CSGInterface::cmd_get_classifier()
4645 {
4646  if (m_nrhs<1 || m_nrhs>2 || !create_return_values(2))
4647  return false;
4648 
4649  int32_t idx=-1;
4650  if (m_nrhs==2)
4651  idx=get_int();
4652 
4653  float64_t* bias=NULL;
4654  float64_t* weights=NULL;
4655  int32_t rows=0;
4656  int32_t cols=0;
4657  int32_t brows=0;
4658  int32_t bcols=0;
4659 
4660  if (!ui_classifier->get_trained_classifier(
4661  weights, rows, cols, bias, brows, bcols, idx))
4662  return false;
4663 
4664  //SG_PRINT("brows %d, bcols %d\n", brows, bcols)
4665  //CMath::display_matrix(bias, brows, bcols);
4666  set_matrix(bias, brows, bcols);
4667  SG_FREE(bias);
4668 
4669  //SG_PRINT("rows %d, cols %d\n", rows, cols)
4670  //CMath::display_matrix(weights, rows, cols);
4671  set_matrix(weights, rows, cols);
4672  SG_FREE(weights);
4673 
4674  return true;
4675 }
4676 
4677 bool CSGInterface::cmd_new_classifier()
4678 {
4679  if (m_nrhs<2 || !create_return_values(0))
4680  return false;
4681 
4682  int32_t len=0;
4683  char* name=get_str_from_str_or_direct(len);
4684  int32_t d=6;
4685  int32_t from_d=40;
4686 
4687  if (m_nrhs>2)
4688  {
4689  d=get_int_from_int_or_str();
4690 
4691  if (m_nrhs>3)
4692  from_d=get_int_from_int_or_str();
4693  }
4694 
4695  bool success=ui_classifier->new_classifier(name, d, from_d);
4696 
4697  SG_FREE(name);
4698  return success;
4699 }
4700 
4701 bool CSGInterface::cmd_save_classifier()
4702 {
4703  if (m_nrhs<2 || !create_return_values(0))
4704  return false;
4705 
4706  int32_t len=0;
4707  char* filename=get_str_from_str_or_direct(len);
4708 
4709  bool success=ui_classifier->save(filename);
4710 
4711  SG_FREE(filename);
4712  return success;
4713 }
4714 
4715 bool CSGInterface::cmd_load_classifier()
4716 {
4717  if (m_nrhs<3 || !create_return_values(0))
4718  return false;
4719 
4720  int32_t len=0;
4721  char* filename=get_str_from_str_or_direct(len);
4722  char* type=get_str_from_str_or_direct(len);
4723 
4724  bool success=ui_classifier->load(filename, type);
4725 
4726  if (dynamic_cast<CKernelMachine*>(ui_classifier->get_classifier()))
4727  {
4728  CKernelMachine* kernel_machine = dynamic_cast<CKernelMachine*>(ui_classifier->get_classifier());
4729  ui_features->set_train_features(kernel_machine->get_kernel()->get_lhs());
4730  ui_features->set_test_features(kernel_machine->get_kernel()->get_rhs());
4731  ui_kernel->set_kernel(kernel_machine->get_kernel());
4732  }
4733 
4734  SG_FREE(filename);
4735  SG_FREE(type);
4736  return success;
4737 }
4738 
4739 bool CSGInterface::cmd_get_num_svms()
4740 {
4741  if (m_nrhs!=1 || !create_return_values(1))
4742  return false;
4743 
4744  set_int(ui_classifier->get_num_svms());
4745 
4746  return true;
4747 }
4748 
4749 
4750 bool CSGInterface::cmd_get_svm()
4751 {
4752  return cmd_get_classifier();
4753 }
4754 
4755 bool CSGInterface::cmd_set_svm()
4756 {
4757  if (m_nrhs!=3 || !create_return_values(0))
4758  return false;
4759 
4760  float64_t bias=get_real();
4761 
4762  float64_t* alphas=NULL;
4763  int32_t num_feat_alphas=0;
4764  int32_t num_vec_alphas=0;
4765  get_matrix(alphas, num_feat_alphas, num_vec_alphas);
4766 
4767  if (!alphas)
4768  SG_ERROR("No proper alphas given.\n")
4769  if (num_vec_alphas!=2)
4770  SG_ERROR("Not 2 vectors in alphas.\n")
4771 
4772  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4773  if (!svm)
4774  SG_ERROR("No SVM object available.\n")
4775 
4776  svm->create_new_model(num_feat_alphas);
4777  svm->set_bias(bias);
4778 
4779  int32_t num_support_vectors=svm->get_num_support_vectors();
4780  for (int32_t i=0; i<num_support_vectors; i++)
4781  {
4782  svm->set_alpha(i, alphas[i]);
4783  svm->set_support_vector(i, (int32_t) alphas[i+num_support_vectors]);
4784  }
4785  SG_FREE(alphas);
4786 
4787  return true;
4788 }
4789 
4790 bool CSGInterface::cmd_set_linear_classifier()
4791 {
4792  if (m_nrhs!=3 || !create_return_values(0))
4793  return false;
4794 
4795  float64_t bias=get_real();
4796 
4797  float64_t* w=NULL;
4798  int32_t len=0;
4799  get_vector(w, len);
4800 
4801  if (!len)
4802  SG_ERROR("No proper weight vector given.\n")
4803 
4804  CLinearMachine* c=(CLinearMachine*) ui_classifier->get_classifier();
4805  if (!c)
4806  SG_ERROR("No Linear Classifier object available.\n")
4807 
4808  c->set_w(SGVector<float64_t>(w, len));
4809  c->set_bias(bias);
4810  return true;
4811 }
4812 
4813 bool CSGInterface::cmd_get_svm_objective()
4814 {
4815  if (m_nrhs!=1 || !create_return_values(1))
4816  return false;
4817 
4818  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4819  if (!svm)
4820  SG_ERROR("No SVM set.\n")
4821 
4822  set_real(svm->get_objective());
4823 
4824  return true;
4825 }
4826 
4827 bool CSGInterface::cmd_compute_svm_primal_objective()
4828 {
4829  return do_compute_objective(SVM_PRIMAL);
4830 }
4831 
4832 bool CSGInterface::cmd_compute_svm_dual_objective()
4833 {
4834  return do_compute_objective(SVM_DUAL);
4835 }
4836 
4837 bool CSGInterface::cmd_compute_mkl_dual_objective()
4838 {
4839  return do_compute_objective(MKL_DUAL);
4840 }
4841 
4842 bool CSGInterface::cmd_compute_relative_mkl_duality_gap()
4843 {
4844  return do_compute_objective(MKL_RELATIVE_DUALITY_GAP);
4845 }
4846 
4847 bool CSGInterface::cmd_compute_absolute_mkl_duality_gap()
4848 {
4849  return do_compute_objective(MKL_ABSOLUTE_DUALITY_GAP);
4850 }
4851 
4852 bool CSGInterface::do_compute_objective(E_WHICH_OBJ obj)
4853 {
4854  if (m_nrhs!=1 || !create_return_values(1))
4855  return false;
4856 
4857  float64_t result=23.5;
4858 
4859  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4860  if (!svm)
4861  SG_ERROR("No SVM set.\n")
4862 
4863  CLabels* trainlabels=NULL;
4864  trainlabels=ui_labels->get_train_labels();
4865 
4866  if (!trainlabels)
4867  SG_ERROR("No trainlabels available.\n")
4868 
4869  CKernel* kernel=ui_kernel->get_kernel();
4870  if (!kernel)
4871  SG_ERROR("No kernel available.\n")
4872 
4873  if (!ui_kernel->is_initialized() || !kernel->has_features())
4874  SG_ERROR("Kernel not initialized.\n")
4875 
4876  ((CKernelMachine*) svm)->set_labels(trainlabels);
4877  ((CKernelMachine*) svm)->set_kernel(kernel);
4878 
4879 
4880  switch (obj)
4881  {
4882  case SVM_PRIMAL:
4883  result=svm->compute_svm_primal_objective();
4884  break;
4885  case SVM_DUAL:
4886  result=svm->compute_svm_dual_objective();
4887  break;
4888  case MKL_PRIMAL:
4890  result=((CMKL*) svm)->compute_mkl_primal_objective();
4891  break;
4892  case MKL_DUAL:
4894  result=((CMKL*) svm)->compute_mkl_dual_objective();
4895  break;
4896  case MKL_RELATIVE_DUALITY_GAP:
4897  {
4899  float64_t primal=((CMKL*) svm)->compute_mkl_dual_objective();
4900  float64_t dual=((CMKL*) svm)->compute_mkl_primal_objective();
4901  result=(primal-dual)/dual;
4902  }
4903  break;
4904  case MKL_ABSOLUTE_DUALITY_GAP:
4905  {
4907  float64_t primal=((CMKL*) svm)->compute_mkl_dual_objective();
4908  float64_t dual=((CMKL*) svm)->compute_mkl_primal_objective();
4909  result=dual-primal;
4910  }
4911  break;
4912  default:
4913  SG_SERROR("Error calling do_compute_objective\n")
4914  return false;
4915  };
4916 
4917  set_real(result);
4918  return true;
4919 }
4920 
4921 bool CSGInterface::cmd_train_classifier()
4922 {
4923  if (m_nrhs<1 || !create_return_values(0))
4924  return false;
4925 
4926  CMachine* classifier=ui_classifier->get_classifier();
4927  if (!classifier)
4928  SG_ERROR("No classifier available.\n")
4929 
4930  EMachineType type=classifier->get_classifier_type();
4931  switch (type)
4932  {
4933  case CT_LIGHT:
4934  case CT_LIGHTONECLASS:
4935  case CT_LIBSVM:
4936  case CT_SCATTERSVM:
4937  case CT_MPD:
4938  case CT_GPBT:
4939  case CT_CPLEXSVM:
4940  case CT_GMNPSVM:
4941  case CT_GNPPSVM:
4942  case CT_KERNELPERCEPTRON:
4943  case CT_LIBSVR:
4944  case CT_LIBSVMMULTICLASS:
4945  case CT_LIBSVMONECLASS:
4946  case CT_SVRLIGHT:
4947  case CT_LARANK:
4948  return ui_classifier->train_svm();
4949  case CT_MKLMULTICLASS:
4950  return ui_classifier->train_mkl_multiclass();
4951  case CT_MKLCLASSIFICATION:
4952  case CT_MKLREGRESSION:
4953  case CT_MKLONECLASS:
4954  return ui_classifier->train_mkl();
4955 
4957  return ui_classifier->train_krr();
4958 
4959  case CT_KNN:
4960  {
4961  if (m_nrhs<2)
4962  return false;
4963 
4964  int32_t k=get_int_from_int_or_str();
4965 
4966  return ui_classifier->train_knn(k);
4967  }
4968 
4969  case CT_KMEANS:
4970  {
4971  if (m_nrhs<3)
4972  return false;
4973 
4974  int32_t k=get_int_from_int_or_str();
4975  int32_t max_iter=get_int_from_int_or_str();
4976 
4977  return ui_classifier->train_clustering(k, max_iter);
4978  }
4979 
4980  case CT_HIERARCHICAL:
4981  {
4982  if (m_nrhs<2)
4983  return false;
4984 
4985  int32_t merges=get_int_from_int_or_str();
4986 
4987  return ui_classifier->train_clustering(merges);
4988  }
4989 
4990  case CT_LDA:
4991  {
4992  float64_t gamma=0;
4993  if (m_nrhs==2)
4994  gamma=get_real_from_real_or_str();
4995 
4996  return ui_classifier->train_linear(gamma);
4997  }
4998 
4999  case CT_PERCEPTRON:
5000  case CT_SVMLIN:
5001  case CT_SVMPERF:
5002  case CT_SVMOCAS:
5003  case CT_SVMSGD:
5004  case CT_LPM:
5005  case CT_LPBOOST:
5006  case CT_LIBLINEAR:
5007  return ui_classifier->train_linear();
5008 
5009 #ifdef USE_GPL_SHOGUN
5010  case CT_WDSVMOCAS:
5011  return ui_classifier->train_wdocas();
5012 #endif
5013 
5014  default:
5015  SG_ERROR("Unknown classifier type %d.\n", type)
5016  }
5017 
5018  return false;
5019 }
5020 
5021 bool CSGInterface::cmd_do_auc_maximization()
5022 {
5023  if (m_nrhs!=2 || !create_return_values(0))
5024  return false;
5025 
5026  bool do_auc=get_bool_from_bool_or_str();
5027 
5028  return ui_classifier->set_do_auc_maximization(do_auc);
5029 }
5030 
5031 bool CSGInterface::cmd_set_perceptron_parameters()
5032 {
5033  if (m_nrhs!=3 || !create_return_values(0))
5034  return false;
5035 
5036  float64_t lernrate=get_real_from_real_or_str();
5037  int32_t maxiter=get_int_from_int_or_str();
5038 
5039  return ui_classifier->set_perceptron_parameters(lernrate, maxiter);
5040 }
5041 
5042 bool CSGInterface::cmd_set_svm_qpsize()
5043 {
5044  if (m_nrhs!=2 || !create_return_values(0))
5045  return false;
5046 
5047  int32_t qpsize=get_int_from_int_or_str();
5048 
5049  return ui_classifier->set_svm_qpsize(qpsize);
5050 }
5051 
5052 bool CSGInterface::cmd_set_svm_max_qpsize()
5053 {
5054  if (m_nrhs!=2 || !create_return_values(0))
5055  return false;
5056 
5057  int32_t max_qpsize=get_int_from_int_or_str();
5058 
5059  return ui_classifier->set_svm_max_qpsize(max_qpsize);
5060 }
5061 
5062 bool CSGInterface::cmd_set_svm_bufsize()
5063 {
5064  if (m_nrhs!=2 || !create_return_values(0))
5065  return false;
5066 
5067  int32_t bufsize=get_int_from_int_or_str();
5068 
5069  return ui_classifier->set_svm_bufsize(bufsize);
5070 }
5071 
5072 bool CSGInterface::cmd_set_svm_C()
5073 {
5074  if (m_nrhs<2 || !create_return_values(0))
5075  return false;
5076 
5077  float64_t C1=get_real_from_real_or_str();
5078  float64_t C2=C1;
5079 
5080  if (m_nrhs==3)
5081  C2=get_real_from_real_or_str();
5082 
5083  return ui_classifier->set_svm_C(C1, C2);
5084 }
5085 
5086 bool CSGInterface::cmd_set_svm_epsilon()
5087 {
5088  if (m_nrhs!=2 || !create_return_values(0))
5089  return false;
5090 
5091  float64_t epsilon=get_real_from_real_or_str();
5092 
5093  return ui_classifier->set_svm_epsilon(epsilon);
5094 }
5095 
5096 bool CSGInterface::cmd_set_svr_tube_epsilon()
5097 {
5098  if (m_nrhs!=2 || !create_return_values(0))
5099  return false;
5100 
5101  float64_t tube_epsilon=get_real_from_real_or_str();
5102 
5103  return ui_classifier->set_svr_tube_epsilon(tube_epsilon);
5104 }
5105 
5106 bool CSGInterface::cmd_set_svm_nu()
5107 {
5108  if (m_nrhs!=2 || !create_return_values(0))
5109  return false;
5110 
5111  float64_t nu=get_real_from_real_or_str();
5112 
5113  return ui_classifier->set_svm_nu(nu);
5114 }
5115 
5116 bool CSGInterface::cmd_set_svm_mkl_parameters()
5117 {
5118  if (m_nrhs<3 || m_nrhs>4 || !create_return_values(0))
5119  return false;
5120 
5121  float64_t weight_epsilon=get_real_from_real_or_str();
5122  float64_t C_mkl=get_real_from_real_or_str();
5123  float64_t mkl_norm=1.0;
5124 
5125  if (m_nrhs==4)
5126  mkl_norm=get_real_from_real_or_str();
5127 
5128  return ui_classifier->set_svm_mkl_parameters(weight_epsilon, C_mkl, mkl_norm);
5129 }
5130 
5131 bool CSGInterface::cmd_set_elasticnet_lambda()
5132 {
5133  if (m_nrhs!=2 || !create_return_values(0))
5134  return false;
5135  float64_t lambda=get_real_from_real_or_str();
5136  return ui_classifier->set_elasticnet_lambda(lambda);
5137 }
5138 
5139 bool CSGInterface::cmd_set_mkl_block_norm()
5140 {
5141  if (m_nrhs!=2 || !create_return_values(0))
5142  return false;
5143  float64_t bnorm=get_real_from_real_or_str();
5144  return ui_classifier->set_mkl_block_norm(bnorm);
5145 }
5146 
5147 
5148 bool CSGInterface::cmd_set_max_train_time()
5149 {
5150  if (m_nrhs!=2 || !create_return_values(0))
5151  return false;
5152 
5153  float64_t max_train_time=get_real_from_real_or_str();
5154 
5155  return ui_classifier->set_max_train_time(max_train_time);
5156 }
5157 
5158 bool CSGInterface::cmd_set_svm_shrinking_enabled()
5159 {
5160  if (m_nrhs!=2 || !create_return_values(0))
5161  return false;
5162 
5163  bool shrinking_enabled=get_bool_from_bool_or_str();
5164 
5165  return ui_classifier->set_svm_shrinking_enabled(shrinking_enabled);
5166 }
5167 
5168 bool CSGInterface::cmd_set_svm_batch_computation_enabled()
5169 {
5170  if (m_nrhs!=2 || !create_return_values(0))
5171  return false;
5172 
5173  bool batch_computation_enabled=get_bool_from_bool_or_str();
5174 
5175  return ui_classifier->set_svm_batch_computation_enabled(
5176  batch_computation_enabled);
5177 }
5178 
5179 bool CSGInterface::cmd_set_svm_linadd_enabled()
5180 {
5181  if (m_nrhs!=2 || !create_return_values(0))
5182  return false;
5183 
5184  bool linadd_enabled=get_bool_from_bool_or_str();
5185 
5186  return ui_classifier->set_svm_linadd_enabled(linadd_enabled);
5187 }
5188 
5189 bool CSGInterface::cmd_set_svm_bias_enabled()
5190 {
5191  if (m_nrhs!=2 || !create_return_values(0))
5192  return false;
5193 
5194  bool bias_enabled=get_bool_from_bool_or_str();
5195 
5196  return ui_classifier->set_svm_bias_enabled(bias_enabled);
5197 }
5198 
5199 bool CSGInterface::cmd_set_mkl_interleaved_enabled()
5200 {
5201  if (m_nrhs!=2 || !create_return_values(0))
5202  return false;
5203 
5204  bool interleaved_enabled=get_bool_from_bool_or_str();
5205 
5206  return ui_classifier->set_mkl_interleaved_enabled(interleaved_enabled);
5207 }
5208 
5209 bool CSGInterface::cmd_set_krr_tau()
5210 {
5211  if (m_nrhs!=2 || !create_return_values(0))
5212  return false;
5213 
5214  float64_t tau=get_real_from_real_or_str();
5215 
5216  return ui_classifier->set_krr_tau(tau);
5217 }
5218 
5219 
5220 /* Preproc */
5221 
5222 bool CSGInterface::cmd_add_preproc()
5223 {
5224  if (m_nrhs<2 || !create_return_values(0))
5225  return false;
5226 
5227  int32_t len=0;
5228  char* type=get_str_from_str_or_direct(len);
5229  CPreprocessor* preproc=NULL;
5230 
5231  if (strmatch(type, "NORMONE"))
5232  preproc=ui_preproc->create_generic(P_NORMONE);
5233  else if (strmatch(type, "LOGPLUSONE"))
5234  preproc=ui_preproc->create_generic(P_LOGPLUSONE);
5235  else if (strmatch(type, "SORTWORDSTRING"))
5236  preproc=ui_preproc->create_generic(P_SORTWORDSTRING);
5237  else if (strmatch(type, "SORTULONGSTRING"))
5238  preproc=ui_preproc->create_generic(P_SORTULONGSTRING);
5239  else if (strmatch(type, "DECOMPRESSCHARSTRING"))
5240  preproc=ui_preproc->create_generic(P_DECOMPRESSCHARSTRING);
5241  else if (strmatch(type, "SORTWORD"))
5242  preproc=ui_preproc->create_generic(P_SORTWORD);
5243 
5244  else if (strmatch(type, "PRUNEVARSUBMEAN"))
5245  {
5246  bool divide_by_std=false;
5247  if (m_nrhs==3)
5248  divide_by_std=get_bool_from_bool_or_str();
5249 
5250  preproc=ui_preproc->create_prunevarsubmean(divide_by_std);
5251  }
5252 
5253 #ifdef HAVE_LAPACK
5254  else if (strmatch(type, "PCA") && m_nrhs==4)
5255  {
5256  bool do_whitening=get_bool_from_bool_or_str();
5257  float64_t threshold=get_real_from_real_or_str();
5258 
5259  preproc=ui_preproc->create_pca(do_whitening, threshold);
5260  }
5261 #endif
5262 
5263  else
5265 
5266  SG_FREE(type);
5267  return ui_preproc->add_preproc(preproc);
5268 }
5269 
5270 bool CSGInterface::cmd_del_preproc()
5271 {
5272  if (m_nrhs!=1 || !create_return_values(0))
5273  return false;
5274 
5275  return ui_preproc->del_preproc();
5276 }
5277 
5278 bool CSGInterface::cmd_attach_preproc()
5279 {
5280  if (m_nrhs<2 || !create_return_values(0))
5281  return false;
5282 
5283  int32_t len=0;
5284  char* target=get_str_from_str_or_direct(len);
5285 
5286  bool do_force=false;
5287  if (m_nrhs==3)
5288  do_force=get_bool_from_bool_or_str();
5289 
5290  bool success=ui_preproc->attach_preproc(target, do_force);
5291 
5292  SG_FREE(target);
5293  return success;
5294 }
5295 
5296 bool CSGInterface::cmd_clean_preproc()
5297 {
5298  if (m_nrhs!=1 || !create_return_values(0))
5299  return false;
5300 
5301  return ui_preproc->clean_preproc();
5302 }
5303 
5304 /* Converter */
5305 
5306 bool CSGInterface::cmd_set_converter()
5307 {
5308  int32_t len=0;
5309  char* type=get_str_from_str_or_direct(len);
5310 
5311  if (strmatch(type, "lle"))
5312  {
5313  int32_t k = get_int_from_int_or_str();
5314  ui_converter->create_locallylinearembedding(k);
5315  return true;
5316  }
5317  if (strmatch(type, "npe"))
5318  {
5319  int32_t k = get_int_from_int_or_str();
5320  ui_converter->create_neighborhoodpreservingembedding(k);
5321  return true;
5322  }
5323  if (strmatch(type, "ltsa"))
5324  {
5325  int32_t k = get_int_from_int_or_str();
5326  ui_converter->create_localtangentspacealignment(k);
5327  return true;
5328  }
5329  if (strmatch(type, "lltsa"))
5330  {
5331  int32_t k = get_int_from_int_or_str();
5332  ui_converter->create_linearlocaltangentspacealignment(k);
5333  return true;
5334  }
5335  if (strmatch(type, "hlle"))
5336  {
5337  int32_t k = get_int_from_int_or_str();
5338  ui_converter->create_hessianlocallylinearembedding(k);
5339  return true;
5340  }
5341  if (strmatch(type, "laplacian_eigenmaps"))
5342  {
5343  int32_t k = get_int_from_int_or_str();
5344  int32_t width = get_real_from_real_or_str();
5345  ui_converter->create_laplacianeigenmaps(k,width);
5346  return true;
5347  }
5348  if (strmatch(type, "lpp"))
5349  {
5350  int32_t k = get_int_from_int_or_str();
5351  int32_t width = get_real_from_real_or_str();
5352  ui_converter->create_localitypreservingprojections(k,width);
5353  return true;
5354  }
5355  if (strmatch(type, "diffusion_maps"))
5356  {
5357  int32_t t = get_int_from_int_or_str();
5358  int32_t width = get_real_from_real_or_str();
5359  ui_converter->create_diffusionmaps(t,width);
5360  return true;
5361  }
5362  if (strmatch(type, "isomap"))
5363  {
5364  int32_t k = get_int_from_int_or_str();
5365  ui_converter->create_isomap(k);
5366  return true;
5367  }
5368  if (strmatch(type, "mds"))
5369  {
5370  ui_converter->create_multidimensionalscaling();
5371  return true;
5372  }
5373  if (strmatch(type, "jade"))
5374  {
5375  ui_converter->create_jade();
5376  return true;
5377  }
5378  return false;
5379 }
5380 
5381 bool CSGInterface::cmd_apply_converter()
5382 {
5383  if (m_nrhs!=1 || !create_return_values(1))
5384  return false;
5385 
5386  CDenseFeatures<float64_t>* conv_features = ui_converter->apply();
5387  SGMatrix<float64_t> converted_mat = conv_features->get_feature_matrix();
5388  set_matrix(converted_mat.matrix,converted_mat.num_rows,converted_mat.num_cols);
5389  return true;
5390 }
5391 
5392 bool CSGInterface::cmd_embed()
5393 {
5394  int32_t target_dim = get_int_from_int_or_str();
5395 
5396  if (m_nrhs!=1 || !create_return_values(1))
5397  return false;
5398 
5399  CDenseFeatures<float64_t>* embedding = ui_converter->embed(target_dim);
5400  SGMatrix<float64_t> embedding_matrix = embedding->get_feature_matrix();
5401  set_matrix(embedding_matrix.matrix,embedding_matrix.num_cols,embedding_matrix.num_rows);
5402  return true;
5403 }
5404 
5405 /* HMM */
5406 
5407 bool CSGInterface::cmd_new_plugin_estimator()
5408 {
5409  if (m_nrhs<2 || !create_return_values(0))
5410  return false;
5411 
5412  float64_t pos_pseudo=get_real_from_real_or_str();
5413  float64_t neg_pseudo=get_real_from_real_or_str();
5414 
5415  return ui_pluginestimate->new_estimator(pos_pseudo, neg_pseudo);
5416 }
5417 
5418 bool CSGInterface::cmd_train_estimator()
5419 {
5420  if (m_nrhs!=1 || !create_return_values(0))
5421  return false;
5422 
5423  return ui_pluginestimate->train();
5424 }
5425 
5426 bool CSGInterface::cmd_plugin_estimate_classify_example()
5427 {
5428  if (m_nrhs!=2 || !create_return_values(1))
5429  return false;
5430 
5431  int32_t idx=get_int();
5432  float64_t result=ui_pluginestimate->apply_one(idx);
5433 
5434  set_vector(&result, 1);
5435  return true;
5436 }
5437 
5438 bool CSGInterface::cmd_plugin_estimate_classify()
5439 {
5440  if (m_nrhs!=1 || !create_return_values(1))
5441  return false;
5442 
5443  CFeatures* feat=ui_features->get_test_features();
5444  if (!feat)
5445  SG_ERROR("No features found.\n")
5446 
5447  int32_t num_vec=feat->get_num_vectors();
5448  float64_t* result=SG_MALLOC(float64_t, num_vec);
5449  CLabels* labels=ui_pluginestimate->apply();
5450  for (int32_t i=0; i<num_vec; i++)
5451  result[i]=((CRegressionLabels*) labels)->get_label(i);
5452  SG_UNREF(labels);
5453 
5454  set_vector(result, num_vec);
5455  SG_FREE(result);
5456 
5457  return true;
5458 }
5459 
5460 bool CSGInterface::cmd_set_plugin_estimate()
5461 {
5462  if (m_nrhs!=3 || !create_return_values(0))
5463  return false;
5464 
5465  float64_t* emission_probs=NULL;
5466  int32_t num_probs=0;
5467  int32_t num_vec=0;
5468  get_matrix(emission_probs, num_probs, num_vec);
5469 
5470  if (num_vec!=2)
5471  SG_ERROR("Need at least 1 set of positive and 1 set of negative params.\n")
5472 
5473  float64_t* pos_params=emission_probs;
5474  float64_t* neg_params=&(emission_probs[num_probs]);
5475 
5476  float64_t* model_sizes=NULL;
5477  int32_t len=0;
5478  get_vector(model_sizes, len);
5479 
5480  int32_t seq_length=(int32_t) model_sizes[0];
5481  int32_t num_symbols=(int32_t) model_sizes[1];
5482  if (num_probs!=seq_length*num_symbols)
5483  SG_ERROR("Mismatch in number of emission probs and sequence length * number of symbols.\n")
5484 
5485  ui_pluginestimate->get_estimator()->set_model_params(
5486  pos_params, neg_params, seq_length, num_symbols);
5487 
5488  return true;
5489 }
5490 
5491 bool CSGInterface::cmd_get_plugin_estimate()
5492 {
5493  if (m_nrhs!=1 || !create_return_values(2))
5494  return false;
5495 
5496  float64_t* pos_params=NULL;
5497  float64_t* neg_params=NULL;
5498  int32_t num_params=0;
5499  int32_t seq_length=0;
5500  int32_t num_symbols=0;
5501 
5502  if (!ui_pluginestimate->get_estimator()->get_model_params(
5503  pos_params, neg_params, seq_length, num_symbols))
5504  return false;
5505 
5506  num_params=seq_length*num_symbols;
5507 
5508  float64_t* result=SG_MALLOC(float64_t, num_params*2);
5509  for (int32_t i=0; i<num_params; i++)
5510  result[i]=pos_params[i];
5511  for (int32_t i=0; i<num_params; i++)
5512  result[i+num_params]=neg_params[i];
5513 
5514  set_matrix(result, num_params, 2);
5515  SG_FREE(result);
5516 
5517  float64_t model_sizes[2];
5518  model_sizes[0]=(float64_t) seq_length;
5519  model_sizes[1]=(float64_t) num_symbols;
5520  set_vector(model_sizes, 2);
5521 
5522  return true;
5523 }
5524 
5525 bool CSGInterface::cmd_convergence_criteria()
5526 {
5527  if (m_nrhs<3 || !create_return_values(0))
5528  return false;
5529 
5530  int32_t num_iterations=get_int_from_int_or_str();
5531  float64_t epsilon=get_real_from_real_or_str();
5532 
5533  return ui_hmm->convergence_criteria(num_iterations, epsilon);
5534 }
5535 
5536 bool CSGInterface::cmd_normalize()
5537 {
5538  if (m_nrhs<2 || !create_return_values(0))
5539  return false;
5540 
5541  bool keep_dead_states=get_bool_from_bool_or_str();
5542 
5543  return ui_hmm->normalize(keep_dead_states);
5544 }
5545 
5546 bool CSGInterface::cmd_add_states()
5547 {
5548  if (m_nrhs<3 || !create_return_values(0))
5549  return false;
5550 
5551  int32_t num_states=get_int_from_int_or_str();
5552  float64_t value=get_real_from_real_or_str();
5553 
5554  return ui_hmm->add_states(num_states, value);
5555 }
5556 
5557 bool CSGInterface::cmd_permutation_entropy()
5558 {
5559  if (m_nrhs<3 || !create_return_values(0))
5560  return false;
5561 
5562  int32_t width=get_int_from_int_or_str();
5563  int32_t seq_num=get_int_from_int_or_str();
5564 
5565  return ui_hmm->permutation_entropy(width, seq_num);
5566 }
5567 
5568 bool CSGInterface::cmd_relative_entropy()
5569 {
5570  if (m_nrhs!=1 || !create_return_values(1))
5571  return false;
5572 
5573  float64_t* entropy=NULL;
5574  int32_t len=0;
5575  bool success=ui_hmm->relative_entropy(entropy, len);
5576  if (!success)
5577  return false;
5578 
5579  set_vector(entropy, len);
5580 
5581  SG_FREE(entropy);
5582  return true;
5583 }
5584 
5585 bool CSGInterface::cmd_entropy()
5586 {
5587  if (m_nrhs!=1 || !create_return_values(1))
5588  return false;
5589 
5590  float64_t* entropy=NULL;
5591  int32_t len=0;
5592  bool success=ui_hmm->entropy(entropy, len);
5593  if (!success)
5594  return false;
5595 
5596  set_vector(entropy, len);
5597 
5598  SG_FREE(entropy);
5599  return true;
5600 }
5601 
5602 bool CSGInterface::cmd_hmm_classify()
5603 {
5604  return do_hmm_classify(false, false);
5605 }
5606 
5607 bool CSGInterface::cmd_one_class_hmm_classify()
5608 {
5609  return do_hmm_classify(false, true);
5610 }
5611 
5612 bool CSGInterface::cmd_one_class_linear_hmm_classify()
5613 {
5614  return do_hmm_classify(true, true);
5615 }
5616 
5617 bool CSGInterface::do_hmm_classify(bool linear, bool one_class)
5618 {
5619  if (m_nrhs>1 || !create_return_values(1))
5620  return false;
5621 
5622  CFeatures* feat=ui_features->get_test_features();
5623  if (!feat)
5624  return false;
5625 
5626  int32_t num_vec=feat->get_num_vectors();
5627  CRegressionLabels* labels=NULL;
5628 
5629  if (linear) // must be one_class as well
5630  {
5631  labels=ui_hmm->linear_one_class_classify();
5632  }
5633  else
5634  {
5635  if (one_class)
5636  labels=ui_hmm->one_class_classify();
5637  else
5638  labels=ui_hmm->classify();
5639  }
5640  if (!labels)
5641  return false;
5642 
5643  float64_t* result=SG_MALLOC(float64_t, num_vec);
5644  for (int32_t i=0; i<num_vec; i++)
5645  result[i]=labels->get_label(i);
5646  SG_UNREF(labels);
5647 
5648  set_vector(result, num_vec);
5649  SG_FREE(result);
5650 
5651  return true;
5652 }
5653 
5654 bool CSGInterface::cmd_one_class_hmm_classify_example()
5655 {
5656  return do_hmm_classify_example(true);
5657 }
5658 
5659 bool CSGInterface::cmd_hmm_classify_example()
5660 {
5661  return do_hmm_classify_example(false);
5662 }
5663 
5664 bool CSGInterface::do_hmm_classify_example(bool one_class)
5665 {
5666  if (m_nrhs!=2 || !create_return_values(1))
5667  return false;
5668 
5669  int32_t idx=get_int();
5670  float64_t result=0;
5671 
5672  if (one_class)
5673  result=ui_hmm->one_class_classify_example(idx);
5674  else
5675  result=ui_hmm->classify_example(idx);
5676 
5677  set_real(result);
5678 
5679  return true;
5680 }
5681 
5682 bool CSGInterface::cmd_output_hmm()
5683 {
5684  if (m_nrhs!=1 || !create_return_values(0))
5685  return false;
5686 
5687  return ui_hmm->output_hmm();
5688 }
5689 
5690 bool CSGInterface::cmd_output_hmm_defined()
5691 {
5692  if (m_nrhs!=1 || !create_return_values(0))
5693  return false;
5694 
5695  return ui_hmm->output_hmm_defined();
5696 }
5697 
5698 bool CSGInterface::cmd_hmm_likelihood()
5699 {
5700  if (m_nrhs!=1 || !create_return_values(1))
5701  return false;
5702 
5703  CHMM* h=ui_hmm->get_current();
5704  if (!h)
5705  SG_ERROR("No HMM.\n")
5706 
5707  float64_t likelihood=h->model_probability();
5708  set_real(likelihood);
5709 
5710  return true;
5711 }
5712 
5713 bool CSGInterface::cmd_likelihood()
5714 {
5715  if (m_nrhs!=1 || !create_return_values(0))
5716  return false;
5717 
5718  return ui_hmm->likelihood();
5719 }
5720 
5721 bool CSGInterface::cmd_save_likelihood()
5722 {
5723  if (m_nrhs<2 || !create_return_values(0))
5724  return false;
5725 
5726  int32_t len=0;
5727  char* filename=get_str_from_str_or_direct(len);
5728 
5729  bool is_binary=false;
5730  if (m_nrhs==3)
5731  is_binary=get_bool_from_bool_or_str();
5732 
5733  bool success=ui_hmm->save_likelihood(filename, is_binary);
5734 
5735  SG_FREE(filename);
5736  return success;
5737 }
5738 
5739 bool CSGInterface::cmd_get_viterbi_path()
5740 {
5741  if (m_nrhs!=2 || !create_return_values(2))
5742  return false;
5743 
5744  int32_t dim=get_int();
5745  SG_DEBUG("dim: %f\n", dim)
5746 
5747  CHMM* h=ui_hmm->get_current();
5748  if (!h)
5749  return false;
5750 
5751  CFeatures* feat=ui_features->get_test_features();
5752  if (!feat || (feat->get_feature_class()!=C_STRING) ||
5753  (feat->get_feature_type()!=F_WORD))
5754  return false;
5755 
5757 
5758  int32_t num_feat=0;
5759  bool free_vec;
5760  uint16_t* vec=((CStringFeatures<uint16_t>*) feat)->get_feature_vector(dim, num_feat, free_vec);
5761  if (!vec || num_feat<=0)
5762  {
5763  ((CStringFeatures<uint16_t>*) feat)->free_feature_vector(vec, dim, free_vec);
5764  return false;
5765  }
5766  ((CStringFeatures<uint16_t>*) feat)->free_feature_vector(vec, dim, free_vec);
5767 
5768  SG_DEBUG("computing viterbi path for vector %d (length %d)\n", dim, num_feat)
5769  float64_t likelihood=0;
5770  T_STATES* path=h->get_path(dim, likelihood);
5771 
5772  set_vector(path, num_feat);
5773  SG_FREE(path);
5774  set_real(likelihood);
5775 
5776  return true;
5777 }
5778 
5779 bool CSGInterface::cmd_viterbi_train()
5780 {
5781  if (m_nrhs!=1 || !create_return_values(0))
5782  return false;
5783 
5784  return ui_hmm->viterbi_train();
5785 }
5786 
5787 bool CSGInterface::cmd_viterbi_train_defined()
5788 {
5789  if (m_nrhs!=1 || !create_return_values(0))
5790  return false;
5791 
5792  return ui_hmm->viterbi_train_defined();
5793 }
5794 
5795 bool CSGInterface::cmd_baum_welch_train()
5796 {
5797  if (m_nrhs!=1 || !create_return_values(0))
5798  return false;
5799 
5800  return ui_hmm->baum_welch_train();
5801 }
5802 
5803 bool CSGInterface::cmd_baum_welch_train_defined()
5804 {
5805  if (m_nrhs!=1 || !create_return_values(0))
5806  return false;
5807 
5808  return ui_hmm->baum_welch_train_defined();
5809 }
5810 
5811 
5812 bool CSGInterface::cmd_baum_welch_trans_train()
5813 {
5814  if (m_nrhs!=1 || !create_return_values(0))
5815  return false;
5816 
5817  return ui_hmm->baum_welch_trans_train();
5818 }
5819 
5820 bool CSGInterface::cmd_linear_train()
5821 {
5822  if (m_nrhs<1 || !create_return_values(0))
5823  return false;
5824 
5825  if (m_nrhs==2)
5826  {
5827  int32_t len=0;
5828  char* align=get_str_from_str_or_direct(len);
5829 
5830  bool success=ui_hmm->linear_train(align[0]);
5831 
5832  SG_FREE(align);
5833  return success;
5834  }
5835  else
5836  return ui_hmm->linear_train();
5837 }
5838 
5839 bool CSGInterface::cmd_save_path()
5840 {
5841  if (m_nrhs<2 || !create_return_values(0))
5842  return false;
5843 
5844  int32_t len=0;
5845  char* filename=get_str_from_str_or_direct(len);
5846 
5847  bool is_binary=false;
5848  if (m_nrhs==3)
5849  is_binary=get_bool_from_bool_or_str();
5850 
5851  bool success=ui_hmm->save_path(filename, is_binary);
5852 
5853  SG_FREE(filename);
5854  return success;
5855 }
5856 
5857 bool CSGInterface::cmd_append_hmm()
5858 {
5859  if (m_nrhs!=5 || !create_return_values(0))
5860  return false;
5861 
5862  CHMM* old_h=ui_hmm->get_current();
5863  if (!old_h)
5864  SG_ERROR("No current HMM set.\n")
5865 
5866  float64_t* p=NULL;
5867  int32_t N_p=0;
5868  get_vector(p, N_p);
5869 
5870  float64_t* q=NULL;
5871  int32_t N_q=0;
5872  get_vector(q, N_q);
5873 
5874  float64_t* a=NULL;
5875  int32_t M_a=0;
5876  int32_t N_a=0;
5877  get_matrix(a, M_a, N_a);
5878  int32_t N=N_a;
5879 
5880  float64_t* b=NULL;
5881  int32_t M_b=0;
5882  int32_t N_b=0;
5883  get_matrix(b, M_b, N_b);
5884  int32_t M=N_b;
5885 
5886  if (N_p!=N || N_q!=N || N_a!=N || M_a!=N || N_b!=M || M_b!=N)
5887  {
5888  SG_ERROR("Model matrices not matching in size.\n"
5889  "p:(%d) q:(%d) a:(%d,%d) b(%d,%d)\n",
5890  N_p, N_q, N_a, M_a, N_b, M_b);
5891  }
5892 
5893  CHMM* h=new CHMM(N, M, NULL, ui_hmm->get_pseudo());
5894  int32_t i,j;
5895 
5896  for (i=0; i<N; i++)
5897  {
5898  h->set_p(i, p[i]);
5899  h->set_q(i, q[i]);
5900  }
5901 
5902  for (i=0; i<N; i++)
5903  for (j=0; j<N; j++)
5904  h->set_a(i,j, a[i+j*N]);
5905 
5906  for (i=0; i<N; i++)
5907  for (j=0; j<M; j++)
5908  h->set_b(i,j, b[i+j*N]);
5909 
5910  old_h->append_model(h);
5911  SG_UNREF(h);
5912 
5913  return true;
5914 }
5915 
5916 bool CSGInterface::cmd_append_model()
5917 {
5918  if (m_nrhs<2 || !create_return_values(0))
5919  return false;
5920  if (m_nrhs>2 && m_nrhs!=4)
5921  return false;
5922 
5923  int32_t len=0;
5924  char* filename=get_str_from_str_or_direct(len);
5925  int32_t base1=-1;
5926  int32_t base2=-1;
5927  if (m_nrhs>2)
5928  {
5929  base1=get_int_from_int_or_str();
5930  base2=get_int_from_int_or_str();
5931  }
5932 
5933  bool success=ui_hmm->append_model(filename, base1, base2);
5934 
5935  SG_FREE(filename);
5936  return success;
5937 }
5938 
5939 bool CSGInterface::cmd_new_hmm()
5940 {
5941  if (m_nrhs!=3 || !create_return_values(0))
5942  return false;
5943 
5944  int32_t n=get_int_from_int_or_str();
5945  int32_t m=get_int_from_int_or_str();
5946 
5947  return ui_hmm->new_hmm(n, m);
5948 }
5949 
5950 bool CSGInterface::cmd_load_hmm()
5951 {
5952  if (m_nrhs!=2 || !create_return_values(0))
5953  return false;
5954 
5955  int32_t len=0;
5956  char* filename=get_str_from_str_or_direct(len);
5957 
5958  bool success=ui_hmm->load(filename);
5959 
5960  SG_FREE(filename);
5961  return success;
5962 }
5963 
5964 bool CSGInterface::cmd_save_hmm()
5965 {
5966  if (m_nrhs<2 || !create_return_values(0))
5967  return false;
5968 
5969  int32_t len=0;
5970  char* filename=get_str_from_str_or_direct(len);
5971 
5972  bool is_binary=false;
5973  if (m_nrhs==3)
5974  is_binary=get_bool_from_bool_or_str();
5975 
5976  bool success=ui_hmm->save(filename, is_binary);
5977 
5978  SG_FREE(filename);
5979  return success;
5980 }
5981 
5982 bool CSGInterface::cmd_set_hmm()
5983 {
5984  if (m_nrhs!=5 || !create_return_values(0))
5985  return false;
5986 
5987  float64_t* p=NULL;
5988  int32_t N_p=0;
5989  get_vector(p, N_p);
5990 
5991  float64_t* q=NULL;
5992  int32_t N_q=0;
5993  get_vector(q, N_q);
5994 
5995  float64_t* a=NULL;
5996  int32_t M_a=0;
5997  int32_t N_a=0;
5998  get_matrix(a, M_a, N_a);
5999  int32_t N=N_a;
6000 
6001  float64_t* b=NULL;
6002  int32_t M_b=0;
6003  int32_t N_b=0;
6004  get_matrix(b, M_b, N_b);
6005  int32_t M=N_b;
6006 
6007  if (N_p!=N || N_q!=N || N_a!=N || M_a!=N || N_b!=M || M_b!=N)
6008  {
6009  SG_ERROR("Model matrices not matching in size.\n"
6010  "p:(%d) q:(%d) a:(%d,%d) b(%d,%d)\n",
6011  N_p, N_q, N_a, M_a, N_b, M_b);
6012  }
6013 
6014  CHMM* current=ui_hmm->get_current();
6015  if (!current)
6016  SG_ERROR("Need a previously created HMM.\n")
6017 
6018  int32_t i,j;
6019 
6020  for (i=0; i<N; i++)
6021  {
6022  current->set_p(i, p[i]);
6023  current->set_q(i, q[i]);
6024  }
6025 
6026  for (i=0; i<N; i++)
6027  for (j=0; j<N; j++)
6028  current->set_a(i,j, a[i+j*N]);
6029 
6030  for (i=0; i<N; i++)
6031  for (j=0; j<M; j++)
6032  current->set_b(i,j, b[i+j*N]);
6033 
6034  CStringFeatures<uint16_t>* sf = ((CStringFeatures<uint16_t>*) (ui_features->get_train_features()));
6035  current->set_observations(sf);
6036 
6037  return true;
6038 }
6039 
6040 bool CSGInterface::cmd_set_hmm_as()
6041 {
6042  if (m_nrhs!=2 || !create_return_values(0))
6043  return false;
6044 
6045  int32_t len=0;
6046  char* target=get_str_from_str_or_direct(len);
6047 
6048  bool success=ui_hmm->set_hmm_as(target);
6049 
6050  SG_FREE(target);
6051  return success;
6052 }
6053 
6054 bool CSGInterface::cmd_set_chop()
6055 {
6056  if (m_nrhs!=2 || !create_return_values(0))
6057  return false;
6058 
6059  float64_t value=get_real_from_real_or_str();
6060  return ui_hmm->chop(value);
6061 }
6062 
6063 bool CSGInterface::cmd_set_pseudo()
6064 {
6065  if (m_nrhs!=2 || !create_return_values(0))
6066  return false;
6067 
6068  float64_t value=get_real_from_real_or_str();
6069  return ui_hmm->set_pseudo(value);
6070 }
6071 
6072 bool CSGInterface::cmd_load_definitions()
6073 {
6074  if (m_nrhs<2 || !create_return_values(0))
6075  return false;
6076 
6077  int32_t len=0;
6078  char* filename=get_str_from_str_or_direct(len);
6079 
6080  bool do_init=false;
6081  if (m_nrhs==3)
6082  do_init=get_bool_from_bool_or_str();
6083 
6084  bool success=ui_hmm->load_definitions(filename, do_init);
6085 
6086  SG_FREE(filename);
6087  return success;
6088 }
6089 
6090 bool CSGInterface::cmd_get_hmm()
6091 {
6092  if (m_nrhs!=1 || !create_return_values(4))
6093  return false;
6094 
6095  CHMM* h=ui_hmm->get_current();
6096  if (!h)
6097  return false;
6098 
6099  int32_t N=h->get_N();
6100  int32_t M=h->get_M();
6101  int32_t i=0;
6102  int32_t j=0;
6103  float64_t* p=SG_MALLOC(float64_t, N);
6104  float64_t* q=SG_MALLOC(float64_t, N);
6105 
6106  for (i=0; i<N; i++)
6107  {
6108  p[i]=h->get_p(i);
6109  q[i]=h->get_q(i);
6110  }
6111 
6112  set_vector(p, N);
6113  SG_FREE(p);
6114  set_vector(q, N);
6115  SG_FREE(q);
6116 
6117  float64_t* a=SG_MALLOC(float64_t, N*N);
6118  for (i=0; i<N; i++)
6119  for (j=0; j<N; j++)
6120  a[i+j*N]=h->get_a(i, j);
6121  set_matrix(a, N, N);
6122  SG_FREE(a);
6123 
6124  float64_t* b=SG_MALLOC(float64_t, N*M);
6125  for (i=0; i<N; i++)
6126  for (j=0; j<M; j++)
6127  b[i+j*N]=h->get_b(i, j);
6128  set_matrix(b, N, M);
6129  SG_FREE(b);
6130 
6131  return true;
6132 }
6133 
6134 bool CSGInterface::cmd_best_path()
6135 {
6136  if (m_nrhs!=3 || !create_return_values(0))
6137  return false;
6138 
6139  int32_t from=get_int_from_int_or_str();
6140  int32_t to=get_int_from_int_or_str();
6141 
6142  return ui_hmm->best_path(from, to);
6143 }
6144 
6145 bool CSGInterface::cmd_best_path_2struct()
6146 {
6147  if (m_nrhs!=12 || !create_return_values(3))
6148  return false;
6149 
6150  SG_ERROR("Sorry, this parameter list is awful!\n")
6151 
6152  return true;
6153 }
6154 
6155 void CSGInterface::get_vector(bool*& vector, int32_t& len)
6156 {
6157  int32_t* int_vector;
6158  get_vector(int_vector, len);
6159 
6160  ASSERT(len>0)
6161  vector= SG_MALLOC(bool, len);
6162 
6163  for (int32_t i=0; i<len; i++)
6164  vector[i]= (int_vector[i]!=0);
6165 
6166  SG_FREE(int_vector);
6167 }
6168 
6169 void CSGInterface::set_vector(const bool* vector, int32_t len)
6170 {
6171  int32_t* int_vector = SG_MALLOC(int32_t, len);
6172  for (int32_t i=0;i<len;i++)
6173  {
6174  if (vector[i])
6175  int_vector[i]=1;
6176  else
6177  int_vector[i]=0;
6178  }
6179  set_vector(int_vector,len);
6180  SG_FREE(int_vector);
6181 }
6182 
6183 bool CSGInterface::cmd_set_plif_struct()
6184 {
6185  // ARG 2
6186  int32_t Nid=0;
6187  int32_t* ids;
6188  get_vector(ids,Nid);
6189 
6190  // ARG 3
6191  int32_t Nname=0;
6192  int32_t Mname=0;
6193  SGString<char>* names;
6194  get_string_list(names, Nname,Mname);
6195 
6196  // ARG 4
6197  int32_t Nlimits=0;
6198  int32_t Mlimits=0;
6199  float64_t* all_limits;
6200  get_matrix(all_limits, Mlimits, Nlimits);
6201 
6202  // ARG 5
6203  int32_t Npenalties=0;
6204  int32_t Mpenalties=0;
6205  float64_t* all_penalties;
6206  get_matrix(all_penalties, Mpenalties, Npenalties);
6207 
6208  // ARG 6
6209  int32_t Ntransform=0;
6210  int32_t Mtransform=0;
6211  SGString<char>* all_transform;
6212  get_string_list(all_transform, Ntransform, Mtransform);
6213 
6214  // ARG 7
6215  int32_t Nmin=0;
6216  float64_t* min_values;
6217  get_vector(min_values,Nmin);
6218 
6219  // ARG 8
6220  int32_t Nmax=0;
6221  float64_t* max_values;
6222  get_vector(max_values,Nmax);
6223 
6224  // ARG 9
6225  int32_t Ncache=0;
6226  bool* all_use_cache;
6227  get_vector(all_use_cache,Ncache);
6228 
6229  // ARG 10
6230  int32_t Nsvm=0;
6231  int32_t* all_use_svm;
6232  get_vector(all_use_svm,Nsvm);
6233 
6234  // ARG 11
6235  int32_t Ncalc=0;
6236  bool* all_do_calc;
6237  get_vector(all_do_calc,Ncalc);
6238 
6239  if (Ncalc!=Nsvm)
6240  SG_ERROR("Ncalc!=Nsvm, Ncalc:%i, Nsvm:%i\n",Ncalc,Nsvm)
6241  if (Ncalc!=Ncache)
6242  SG_ERROR("Ncalc!=Ncache, Ncalc:%i, Ncache:%i\n",Ncalc,Ncache)
6243  if (Ncalc!=Ntransform)
6244  SG_ERROR("Ncalc!=Ntransform, Ncalc:%i, Ntransform:%i\n",Ncalc,Ntransform)
6245  if (Ncalc!=Nmin)
6246  SG_ERROR("Ncalc!=Nmin, Ncalc:%i, Nmin:%i\n",Ncalc,Nmin)
6247  if (Ncalc!=Nmax)
6248  SG_ERROR("Ncalc!=Nmax, Ncalc:%i, Nmax:%i\n",Ncalc,Nmax)
6249  if (Ncalc!=Npenalties)
6250  SG_ERROR("Ncalc!=Npenalties, Ncalc:%i, Npenalties:%i\n",Ncalc,Npenalties)
6251  if (Ncalc!=Nlimits)
6252  SG_ERROR("Ncalc!=Nlimits, Ncalc:%i, Nlimits:%i\n",Ncalc,Nlimits)
6253  if (Ncalc!=Nname)
6254  SG_ERROR("Ncalc!=Nname, Ncalc:%i, Nname:%i\n",Ncalc,Nname)
6255  if (Ncalc!=Nid)
6256  SG_ERROR("Ncalc!=Nid, Ncalc:%i, Nid:%i\n",Ncalc,Nid)
6257  if (Mlimits!=Mpenalties)
6258  SG_ERROR("Mlimits!=Mpenalties, Mlimits:%i, Mpenalties:%i\n",Mlimits,Mpenalties)
6259 
6260  int32_t N = Ncalc;
6261  int32_t M = Mlimits;
6262  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6263  pm->create_plifs(N, M);
6264  pm->set_plif_ids(SGVector<int32_t>(ids, N));
6265  pm->set_plif_min_values(SGVector<float64_t>(min_values, N));
6266  pm->set_plif_max_values(SGVector<float64_t>(max_values, N));
6267  pm->set_plif_use_cache(SGVector<bool>(all_use_cache, N));
6268  pm->set_plif_use_svm(SGVector<int32_t>(all_use_svm, N));
6269  pm->set_plif_limits(SGMatrix<float64_t>(all_limits, N, M));
6270  pm->set_plif_penalties(SGMatrix<float64_t>(all_penalties, N, M));
6271  pm->set_plif_names(names, N);
6272  pm->set_plif_transform_type(all_transform, N);
6273 
6274  SG_FREE(names);
6275  SG_FREE(all_transform);
6276  SG_FREE(all_do_calc);
6277 
6278  return true;
6279 }
6280 
6281 bool CSGInterface::cmd_get_plif_struct()
6282 {
6283  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6284  CPlif** PEN = pm->get_PEN();
6285  int32_t N = pm->get_num_plifs();
6286  int32_t M = pm->get_num_limits();
6287 
6288 
6289  int32_t* ids = SG_MALLOC(int32_t, N);
6290  float64_t* max_values = SG_MALLOC(float64_t, N);
6291  float64_t* min_values = SG_MALLOC(float64_t, N);
6292  SGString<char>* names = SG_MALLOC(SGString<char>, N);
6293  SGString<char>* all_transform = SG_MALLOC(SGString<char>, N);
6294  float64_t* all_limits = SG_MALLOC(float64_t, N*M);
6295  float64_t* all_penalties = SG_MALLOC(float64_t, N*M);
6296  bool* all_use_cache = SG_MALLOC(bool, N);
6297  int32_t* all_use_svm = SG_MALLOC(int32_t, N);
6298  bool* all_do_calc = SG_MALLOC(bool, N);
6299  for (int32_t i=0;i<N;i++)
6300  {
6301  ids[i]=PEN[i]->get_id();
6302  names[i].string = PEN[i]->get_plif_name();
6303  names[i].slen = strlen(PEN[i]->get_plif_name());
6304  SGVector<float64_t> limits = PEN[i]->get_plif_limits();
6305  SGVector<float64_t> penalties = PEN[i]->get_plif_penalties();
6306  for (int32_t j=0;j<M;j++)
6307  {
6308  all_limits[i*M+j]=limits[j];
6309  all_penalties[i*M+j]=penalties[j];
6310  }
6311  all_transform[i].string = (char*) PEN[i]->get_transform_type();
6312  all_transform[i].slen = strlen(PEN[i]->get_transform_type());
6313  min_values[i]=PEN[i]->get_min_value();
6314  max_values[i]=PEN[i]->get_max_value();
6315  all_use_cache[i]=PEN[i]->get_use_cache();
6316  all_use_svm[i]=PEN[i]->get_use_svm();
6317  all_do_calc[i]=PEN[i]->get_do_calc();
6318 
6319  }
6320  set_vector(ids,N);
6321  set_string_list(names, N);
6322  set_matrix(all_limits, M, N);
6323  set_matrix(all_penalties, M, N);
6324  set_string_list(all_transform, N);
6325  set_vector(min_values,N);
6326  set_vector(max_values,N);
6327  set_vector(all_use_cache,N);
6328  set_vector(all_use_svm,N);
6329  set_vector(all_do_calc,N);
6330 
6331  SG_FREE(ids);
6332  SG_FREE(max_values);
6333  SG_FREE(min_values);
6334  SG_FREE(names);
6335  SG_FREE(all_transform);
6336  SG_FREE(all_limits);
6337  SG_FREE(all_penalties);
6338  SG_FREE(all_use_cache);
6339  SG_FREE(all_use_svm);
6340  SG_FREE(all_do_calc);
6341 
6342  return true;
6343 }
6344 /*bool CSGInterface::cmd_signals_set_model()
6345 {
6346  // ARG 1
6347  int32_t len=0;
6348  char* filename;
6349  filename = get_string(len);
6350 
6351  CTrainPredMaster* tpm = new CTrainPredMaster(ui_kernel);
6352 
6353  tpm->read_models_from_file(filename);
6354 
6355  return true;
6356  }*/
6357 bool CSGInterface::cmd_signals_set_positions()
6358 {
6359  return true;
6360 }
6361 bool CSGInterface::cmd_signals_set_labels()
6362 {
6363  return true;
6364 }
6365 bool CSGInterface::cmd_signals_set_split()
6366 {
6367  return true;
6368 }
6369 bool CSGInterface::cmd_signals_set_train_mask()
6370 {
6371  return true;
6372 }
6373 bool CSGInterface::cmd_signals_add_feature()
6374 {
6375  return true;
6376 }
6377 bool CSGInterface::cmd_signals_add_kernel()
6378 {
6379  return true;
6380 }
6381 bool CSGInterface::cmd_signals_run()
6382 {
6383  return true;
6384 }
6385 
6386 bool CSGInterface::cmd_init_dyn_prog()
6387 {
6388  //ARG 1
6389  int32_t num_svms=get_int();
6390 
6391  CDynProg* h=new CDynProg(num_svms);
6392  ui_structure->set_dyn_prog(h);
6393  return true;
6394 }
6395 
6396 bool CSGInterface::cmd_clean_up_dyn_prog()
6397 {
6398  return ui_structure->cleanup();
6399 }
6400 
6401 bool CSGInterface::cmd_set_model()
6402 {
6403 
6404  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6405 
6406  CDynProg* h = ui_structure->get_dyn_prog();
6407  int32_t num_svms = h->get_num_svms();
6408  //CDynProg* h=new CDynProg(Nweights/* = num_svms */);
6409 
6410  //ARG 1
6411  // transition pointers
6412  // link transitions to length, content, frame (and tiling)
6413  // plifs (#states x #states x 3 or 4)
6414  int32_t numDim=0;
6415  int32_t* Dim=0;
6416  float64_t* penalties_array=NULL;
6417  get_ndarray(penalties_array,Dim,numDim);
6418  ASSERT(numDim==3)
6419  ASSERT(Dim[0]==Dim[1])
6420 
6421  if (!pm->compute_plif_matrix(SGNDArray<float64_t>(penalties_array, Dim, numDim, false)))
6422  SG_ERROR("error computing plif matrix\n")
6423  ui_structure->set_num_states(Dim[0]);
6424  SG_FREE(penalties_array);
6425 
6426  // ARG 2
6427  // bool-> determines if orf information should be used
6428  bool use_orf = get_bool();
6429  ui_structure->set_use_orf(use_orf);
6430 
6431  // ARG 3
6432  // determines for which contents which orf should be used (#contents x 2)
6433  int32_t Nmod=0;
6434  int32_t Mmod=0;
6435  int32_t* mod_words;
6436  get_matrix(mod_words, Nmod,Mmod);
6437  if (Nmod != num_svms)
6438  SG_ERROR("should be equal: Nmod: %i, num_svms: %i\n",Nmod,num_svms)
6439  ASSERT(Mmod == 2)
6440  h->init_mod_words_array(SGMatrix<int32_t>(mod_words, Nmod, Mmod));
6441 
6442  // ARG 4
6443  // links: states -> signal plifs (#states x 2)
6444  int32_t num_states=0;
6445  int32_t feat_dim3=0;
6446  int32_t* state_signals;
6447  get_matrix(state_signals,num_states,feat_dim3);
6448  ASSERT(num_states==Dim[0])
6449  pm->compute_signal_plifs(SGMatrix<int32_t>(state_signals, feat_dim3, num_states));
6450 
6451 
6452  // ARG 5
6453  // ORF info (#states x 2)
6454  int32_t Norf=0;
6455  int32_t Morf=0;
6456  int32_t* orf_info;
6457  get_matrix(orf_info,Norf,Morf);
6458  ASSERT(Norf==num_states)
6459  ASSERT(Morf==2)
6460 
6461  ui_structure->set_orf_info(orf_info, Norf, Morf);
6462  h->set_orf_info(SGMatrix<int32_t>(orf_info, Norf, Morf));
6463 
6464  h->set_num_states(num_states) ;
6465 
6466  return true;
6467 }
6468 
6469 bool CSGInterface::cmd_precompute_content_svms()
6470 {
6471 
6472  // ARG 1
6473  int32_t seq_len=0;
6474  char* seq;
6475  seq = get_string(seq_len);
6476 
6477  // ARG 2
6478  // all feature positions
6479  int32_t Npos=0;
6480  int32_t* all_pos;
6481  get_vector(all_pos, Npos);
6482 
6483  //ARG 3
6484  // content svm weights
6485  int32_t Nweights=0;
6486  int32_t num_svms=0;
6487  float64_t* weights;
6488  get_matrix(weights, Nweights, num_svms);
6489  if (Nweights!=5440)
6490  SG_PRINT("Dimension mismatch: got %i, expect %i\n", Nweights, 5440)
6491  ui_structure->set_content_svm_weights(weights, Nweights, num_svms);
6492 
6493  CDynProg* h = ui_structure->get_dyn_prog();
6494  if (!h)
6495  SG_ERROR("no DynProg object found, use init_dyn_prog first\n")
6496 
6497 
6498  //float64_t* weights = ui_structure->get_content_svm_weights();
6499  //int32_t Mweights = h->get_num_svms();
6500  //int32_t Nweights = ui_structure->get_num_svm_weights();
6501  h->set_pos(SGVector<int32_t>(all_pos, Npos));
6502  h->set_gene_string(SGVector<char>(seq, seq_len));
6503  h->create_word_string();
6505  h->init_content_svm_value_array(num_svms);
6506  h->set_dict_weights(SGMatrix<float64_t>(weights, Nweights, num_svms));
6508  SG_DEBUG("precompute_content_svms done\n")
6509  return true;
6510 }
6511 
6512 bool CSGInterface::cmd_get_lin_feat()
6513 {
6514  CDynProg* h = ui_structure->get_dyn_prog();
6515  if (!h)
6516  SG_ERROR("no DynProg object found, use set_model first\n")
6517 
6518 
6519  int32_t dim1, dim2 = 0;
6520  float64_t* lin_feat = h->get_lin_feat(dim1, dim2);
6521 
6522  set_matrix(lin_feat, dim1, dim2);
6523 
6524  return true;
6525 }
6526 bool CSGInterface::cmd_set_lin_feat()
6527 {
6528  // ARG 1
6529  int32_t Nseq=0;
6530  char* seq;
6531  seq = get_string(Nseq);
6532 
6533  // ARG 2
6534  // all feature positions
6535  int32_t Npos=0;
6536  int32_t* all_pos;
6537  get_vector(all_pos, Npos);
6538 
6539  //ARG 3
6540  //
6541  int32_t num_svms, seq_len;
6542  float64_t* lin_feat=NULL;
6543  get_matrix(lin_feat, num_svms, seq_len);
6544 
6545  if (Npos!=seq_len)
6546  {
6547  SG_ERROR("Dimension mismatch: got %i positions and (%ix%i) values\n", Npos, num_svms, seq_len)
6548 
6549  SG_FREE(lin_feat);
6550  SG_FREE(seq);
6551  SG_FREE(all_pos);
6552 
6553  return false ;
6554  }
6555 
6556  CDynProg* h = ui_structure->get_dyn_prog();
6557  if (!h)
6558  SG_ERROR("no DynProg object found, use set_model first\n")
6559 
6560  h->set_pos(SGVector<int32_t>(all_pos, Npos));
6561  h->set_gene_string(SGVector<char>(seq, Nseq));
6563  h->init_content_svm_value_array(num_svms);
6564  h->set_lin_feat(lin_feat, num_svms, seq_len);
6565 
6566  SG_FREE(lin_feat);
6567 
6568  return true;
6569 }
6570 bool CSGInterface::cmd_long_transition_settings()
6571 {
6572  bool use_long_transitions = get_bool();
6573  int32_t threshold = get_int();
6574  int32_t max_len = get_int();
6575 
6576  CDynProg* h = ui_structure->get_dyn_prog();
6577  if (!h)
6578  SG_ERROR("no DynProg object found, use set_model first\n")
6579 
6580  h->long_transition_settings(use_long_transitions, threshold, max_len);
6581 
6582  return true;
6583 }
6584 bool CSGInterface::cmd_set_feature_matrix()
6585 {
6586  int32_t num_states = ui_structure->get_num_states();
6587 
6588  //ARG 1
6589  // feature matrix (#states x #feature_positions x max_num_signals)
6590  int32_t* Dims=0;
6591  int32_t numDims=0;
6592  float64_t* features = NULL;
6593  get_ndarray(features, Dims, numDims);
6594 
6595  if (numDims!=3)
6596  SG_ERROR("expected a 3 dimensional array, got %i dimensions\n", numDims)
6597  if (Dims[0]!=num_states)
6598  SG_ERROR("number of rows (%i) not equal number of states (%i)\n",Dims[0], num_states)
6599  ASSERT(ui_structure->set_feature_matrix(features, Dims))
6600 
6601  ASSERT(ui_structure->set_feature_dims(Dims))
6602 
6603  SG_FREE(features);
6604  SG_FREE(Dims);
6605 
6606  return true;
6607 }
6608 bool CSGInterface::cmd_set_feature_matrix_sparse()
6609 {
6610  int32_t num_pos = ui_structure->get_num_positions();
6611  int32_t num_states = ui_structure->get_num_states();
6612 
6613  //ARG 1
6614  // feature matrix (#states x #feature_positions x max_num_signals)
6615  int32_t dim11, dim12 ;
6616  SGSparseVector<float64_t> *features1=NULL ;
6617  get_sparse_matrix(features1, dim11, dim12);
6618 
6619  int32_t dim21, dim22 ;
6620  SGSparseVector<float64_t> *features2=NULL ;
6621  get_sparse_matrix(features2, dim21, dim22);
6622 
6623  ASSERT(dim11==dim21)
6624  ASSERT(dim12==dim22)
6625 
6626  int32_t *Dims = SG_MALLOC(int32_t, 3);
6627  Dims[0]=dim11 ;
6628  Dims[1]=dim12 ;
6629  Dims[2]=2 ;
6630 
6631  ASSERT(Dims[0]==num_states)
6632  ASSERT(Dims[1]==num_pos)
6633 
6634  ASSERT(ui_structure->set_feature_matrix_sparse(features1, features2, Dims))
6635  ASSERT(ui_structure->set_feature_dims(Dims))
6636 
6637  SG_FREE(features1);
6638  SG_FREE(features2);
6639  SG_FREE(Dims);
6640 
6641  return true;
6642 }
6643 bool CSGInterface::cmd_init_intron_list()
6644 {
6645  //ARG1 start_positions
6646  int32_t Nstart_positions;
6647  int32_t* start_positions;
6648  get_vector(start_positions, Nstart_positions);
6649  //SG_PRINT("Nstart_positions:%i\n",Nstart_positions)
6650 
6651  //ARG2 end_positions
6652  int32_t Nend_positions;
6653  int32_t* end_positions;
6654  get_vector(end_positions, Nend_positions);
6655  //SG_PRINT("Nend_positions:%i\n",Nend_positions)
6656 
6657  //ARG3 quality
6658  int32_t Nquality;
6659  int32_t* quality;
6660  get_vector(quality, Nquality);
6661  //SG_PRINT("Nquality:%i\n",Nquality)
6662 
6663  //ARG4 all candidate positions
6664  int32_t Nall_pos;
6665  int32_t* all_pos;
6666  get_vector(all_pos, Nall_pos);
6667  //SG_PRINT("Nall_pos:%i\n",Nall_pos)
6668 
6669  ASSERT(Nquality==Nend_positions)
6670  ASSERT(Nend_positions==Nstart_positions)
6671 
6672  CIntronList* intron_list = new CIntronList();
6673 
6674  intron_list->init_list(all_pos, Nall_pos);
6675 
6676  intron_list->read_introns(start_positions, end_positions, quality, Nstart_positions);
6677 
6678  SG_FREE(start_positions);
6679  SG_FREE(end_positions);
6680  SG_FREE(quality);
6681  SG_FREE(all_pos);
6682 
6683  //int32_t test;
6684  //int32_t testq;
6685  //intron_list->get_coverage(&test, &testq, 15 ,16);
6686 
6687  //SG_PRINT("coverage: %i, quality: %i\n",test, testq)
6688 
6689  CDynProg* h = ui_structure->get_dyn_prog();
6690  if (!h)
6691  SG_ERROR("no DynProg object found, use set_model first\n")
6692 
6693  h->set_intron_list(intron_list, 2);
6694 
6695  return true;
6696 }
6697 bool CSGInterface::cmd_precompute_tiling_features()
6698 {
6699  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6700  CPlif** PEN = pm->get_PEN();
6701  CDynProg* h = ui_structure->get_dyn_prog();
6702 
6703  int32_t Nintensities=0;
6704  float64_t* intensities;
6705  get_vector(intensities, Nintensities);
6706 
6707  int32_t Nprobe_pos=0;
6708  int32_t* probe_pos;
6709  get_vector(probe_pos, Nprobe_pos);
6710  ASSERT(Nprobe_pos==Nintensities)
6711 
6712  int32_t Ntiling_plif_ids=0;
6713  int32_t* tiling_plif_ids;
6714  get_vector(tiling_plif_ids, Ntiling_plif_ids);
6715 
6716  h->init_tiling_data(probe_pos,intensities, Nprobe_pos);
6717  h->precompute_tiling_plifs(PEN, tiling_plif_ids, Ntiling_plif_ids);
6718  return true;
6719 }
6720 
6721 bool CSGInterface::cmd_best_path_trans()
6722 {
6723  CDynProg* h = ui_structure->get_dyn_prog();
6724 
6725  CSegmentLoss* seg_loss_obj = h->get_segment_loss_object();
6726 
6727  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6728 
6729  int32_t num_states = h->get_num_states();
6730  int32_t* feat_dims = ui_structure->get_feature_dims();
6731  float64_t* features = (ui_structure->get_feature_matrix(false));
6732  CSparseFeatures<float64_t>* features_sparse1 = (ui_structure->get_feature_matrix_sparse(0));
6733  CSparseFeatures<float64_t>* features_sparse2 = (ui_structure->get_feature_matrix_sparse(1));
6734  int32_t* orf_info = ui_structure->get_orf_info();
6735  bool use_orf = ui_structure->get_use_orf();
6736  int32_t Nplif = pm->get_num_plifs();
6737 
6738  // ARG 1
6739  // transitions from initial state (#states x 1)
6740  int32_t Np=0;
6741  float64_t* p;
6742  get_vector(p, Np);
6743  if (Np!=num_states)
6744  SG_ERROR("# transitions from initial state (%i) does not match # states (%i)\n", Np, num_states)
6745 
6746  // ARG 2
6747  // transitions to end state (#states x 1)
6748  int32_t Nq=0;
6749  float64_t* q;
6750  get_vector(q, Nq);
6751  if (Nq!=num_states)
6752  SG_ERROR("# transitions to end state (%i) does not match # states (%i)\n", Nq, num_states)
6753 
6754  // ARG 3
6755  // number of best paths
6756  int32_t Nnbest=0;
6757  int32_t* all_nbest;
6758  get_vector(all_nbest, Nnbest);
6759  int32_t nbest;
6760  int32_t nother = 0;
6761  if (Nnbest==2)
6762  {
6763  nbest =all_nbest[0];
6764  nother=all_nbest[1];
6765  }
6766  else
6767  nbest =all_nbest[0];
6768  SG_FREE(all_nbest);
6769 
6770  // ARG 4
6771  // segment path (2 x #feature_positions)
6772  // masking/weighting of loss for specific
6773  // regions of the true path
6774  int32_t Nseg_path=0;
6775  int32_t Mseg_path=0;
6776  float64_t* seg_path;
6777  get_matrix(seg_path, Nseg_path, Mseg_path);
6778 
6779  // ARG 5
6780  // links for transitions (#transitions x 4)
6781  int32_t Na_trans=0;
6782  int32_t num_a_trans=0;
6783  float64_t* a_trans;
6784  get_matrix(a_trans, num_a_trans, Na_trans);
6785 
6786  // ARG 6
6787  // loss matrix (#segment x 2*#segments)
6788  // one (#segment x #segments)-matrix for segment loss
6789  // and one for nucleotide loss
6790  int32_t Nloss=0;
6791  int32_t Mloss=0;
6792  float64_t* loss;
6793  get_matrix(loss, Nloss,Mloss);
6794 
6795  int32_t M = h->get_num_positions();
6796 
6798  // check input
6800  ASSERT(num_states==Nq)
6801 
6802  CPlif** PEN=pm->get_PEN();
6803  ASSERT(PEN)
6804 
6805  h->set_p_vector(SGVector<float64_t>(p, num_states));
6806  h->set_q_vector(SGVector<float64_t>(q, num_states));
6807 
6808  if (seg_path!=NULL)
6809  {
6810  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, Na_trans)) ;
6811  }
6812  else
6813  {
6814  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, 3)) ; // segment_id = 0
6815  }
6816 
6817  if (!h->check_svm_arrays())
6818  {
6819  SG_ERROR("svm arrays inconsistent\n")
6820  CPlif::delete_penalty_struct(PEN, Nplif) ;
6821  return false ;
6822  }
6823 
6824  SG_DEBUG("best_path_trans: M: %i, Mseg_path: %i\n", M, Mseg_path)
6825 
6826  h->set_observation_matrix(SGNDArray<float64_t>(features, feat_dims, 3, false));
6827 
6828  if (seg_path!=NULL)
6829  {
6830  h->best_path_set_segment_loss(SGMatrix<float64_t>(loss, Nloss, Mloss, false)) ;
6831  seg_loss_obj->set_segment_loss(loss, Nloss, Mloss);
6832  }
6833  else
6834  {
6835  float64_t zero2[2] = {0.0, 0.0} ;
6837  seg_loss_obj->set_segment_loss(zero2, 2, 1);
6838  }
6839  h->set_content_type_array(SGMatrix<float64_t>(seg_path,Nseg_path,Mseg_path));
6840 
6841  bool segment_loss_non_zero=false;
6842  for (int32_t i=0; i<Nloss*Mloss; i++)
6843  {
6844  if (loss[i]>1e-3)
6845  segment_loss_non_zero=true;
6846  }
6847 
6848  SG_FREE(loss);
6849  loss=NULL;
6850 
6851  h->set_orf_info(SGMatrix<int32_t>(orf_info, num_states, 2));
6852  h->set_sparse_features(features_sparse1, features_sparse2);
6853  h->set_plif_matrices(pm);
6854 
6855  if (segment_loss_non_zero)
6856  {
6857  SG_DEBUG("Using version with segment_loss\n")
6858  if (nbest==1)
6859  h->compute_nbest_paths(feat_dims[2], use_orf, 1,true,false);
6860  else
6861  h->compute_nbest_paths(feat_dims[2], use_orf, 2,true,false);
6862  }
6863  else
6864  {
6865  SG_DEBUG("Using version without segment_loss\n")
6866  if (nbest==1)
6867