This page lists ready to run shogun examples for the Static Matlab(tm) and Octave interface.
To run the examples issue
octave name_of_example.m
or start up matlab or octave and then type
name_of_example
Note that you have to make sure that the sg.oct or sg.mexglx (name varies with architecture) has to be in the matlab/octave path. This can be achieved using the addpath command:
addpath /path/to/octave
respectively
addpath /path/to/matlab
Finally note that for non-root installations you will have to make sure that libshogun and libshogun ui can be found by the dynamic linker, e.g. you will need to set:
LD_LIBRARY_PATH=path/to/libshogun:path/to/libshogunui
before starting matlab.
% In this example a multi-class support vector machine is trained on a toy data
% set and the trained classifier is used to predict labels of test examples.
% The training algorithm is based on BSVM formulation (L2-soft margin
% and the bias added to the objective function) which is solved by the Improved
% Mitchell-Demyanov-Malozemov algorithm. The training algorithm uses the Gaussian
% kernel of width 2.1 and the regularization constant C=1.2. The bias term of the
% classification rule is not used. The solver stops if the relative duality gap
% falls below 1e-5 and it uses 10MB for kernel cache.
%
% For more details on the used SVM solver see
% V.Franc: Optimization Algorithms for Kernel Methods. Research report.
% CTU-CMP-2005-22. CTU FEL Prague. 2005.
% ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-PhD.pdf .
%
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
max_train_time=60;
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% GMNPSVM
disp('GMNPSVM');
sg('new_classifier', 'GMNPSVM');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_multiclass);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% In this example a two-class support vector machine classifier is trained on a
% toy data set and the trained classifier is used to predict labels of test
% examples. As training algorithm Gradient Projection Decomposition Technique
% (GPDT) is used with SVM regularization parameter C=1.2 and a Gaussian
% kernel of width 2.1 and 10MB of kernel cache.
%
% For more details on GPDT solver see http://dm.unife.it/gpdt
%
%
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% GPBTSVM
disp('GPBTSVM');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'GPBTSVM');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% This example shows usage of a k-nearest neighbor (KNN) classification rule on
% a toy data set. The number of the nearest neighbors is set to k=3 and the distances
% are measured by the Euclidean metric. Finally, the KNN rule is applied to predict
% labels of test examples.
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% KNN
disp('KNN');
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'KNN');
sg('train_classifier', 3);
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% In this example a linear two-class classifier is trained based on the Linear
% Discriminant Analysis (LDA) from a toy 2-dimensional examples. The trained
% LDA classifier is used to predict test examples. Note that the LDA classifier
% is optimal under the assumption that both classes are Gaussian distributed with equal
% co-variance. For more details on the LDA see e.g.
% http://en.wikipedia.org/wiki/Linear_discriminant_analysis
%
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LDA
disp('LDA');
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'LDA');
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% In this example a two-class linear support vector machine classifier is trained
% on a toy data set and the trained classifier is used to predict labels of
% test examples. As training algorithm the LIBLINEAR solver is used with the SVM
% regularization parameter C=1 and the bias term in the classification rule
% switched off. The solver iterates until it reaches epsilon-precise
% (epsilon=1.e-5) solution or the maximal training time (max_train_time=60
% seconds) is exceeded.
%
% For more details on LIBLINEAR see
% http://www.csie.ntu.edu.tw/~cjlin/liblinear/
C=1;
use_bias=false;
epsilon=1e-5;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LibLinear
disp('LibLinear');
% type can be one of LIBLINEAR_L2R_LR, LIBLINEAR_L2R_L2LOSS_SVC_DUAL,
% LIBLINEAR_L2R_L2LOSS_SVC, LIBLINEAR_L2R_L1LOSS_SVC_DUAL
sg('new_classifier', 'LIBLINEAR_L2R_LR');
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', sparse(fm_test_real));
result=sg('classify');
% In this example a two-class linear support vector machine classifier is trained
% on a randomly generated data. As training algorithm the LIBLINEAR solver is used
% with the SVM regularization parameter C=0.1 and the bias term in the
% classification rule switched off. The solver iterates until it reaches the
% epsilon-precise (epsilon=1e-3) solution. The example also shows how to compute
% classifier outputs on the training examples and the value of the primal SVM
% objective function.
%
% For more details on LIBLINEAR see
% http://www.csie.ntu.edu.tw/~cjlin/liblinear/
C=0.1;
epsilon=1e-3;
rand('state',17);
num=1000;
dim=20;
dist=1;
traindat=sparse([randn(dim,num/2)-dist, randn(dim,num/2)+dist]);
trainlab=[-ones(1,num/2), ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', false);
sg('svm_epsilon', epsilon);
% type can be one of LIBLINEAR_L2R_LR, LIBLINEAR_L2R_L2LOSS_SVC_DUAL,
% LIBLINEAR_L2R_L2LOSS_SVC, LIBLINEAR_L2R_L1LOSS_SVC_DUAL
sg('new_classifier', 'LIBLINEAR_L2R_L1LOSS_SVC_DUAL');
tic;
sg('train_classifier');
timeliblinear=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
b
W'
obj=sum(W.^2)+C*sum((1-trainlab.*(W'*traindat+b)).^2)
% In this example a two-class support vector machine classifier is trained on a
% toy data set and the trained classifier is used to predict labels of test
% examples. As training algorithm LIBSVM is used with SVM regularization
% parameter C=1 and a Gaussian kernel of width 1.2 and 10MB of kernel cache and
% the precision parameter epsilon=1e-5.
%
% For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1;
use_bias=false;
epsilon=1e-5;
width=2.1;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LibSVM
disp('LibSVM');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'LIBSVM');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% In this example a multi-class support vector machine classifier is trained on a
% toy data set and the trained classifier is used to predict labels of test
% examples. As training algorithm LIBSVM is used with SVM regularization
% parameter C=1.2 and the bias in the classification rule switched off and
% a Gaussian kernel of width 2.1 and 10MB of kernel cache and the precision
% parameter epsilon=1e-5.
%
% For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LibSVM MultiClass
disp('LibSVMMultiClass');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_multiclass);
sg('new_classifier', 'LIBSVM_MULTICLASS');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% In this example a one-class support vector machine classifier is trained on a
% toy data set. The training algorithm finds a hyperplane in the RKHS which
% separates the training data from the origin. The one-class classifier is
% typically used to estimate the support of a high-dimesnional distribution.
% For more details see e.g.
% B. Schoelkopf et al. Estimating the support of a high-dimensional
% distribution. Neural Computation, 13, 2001, 1443-1471.
%
% In the example, the one-class SVM is trained by the LIBSVM solver with the
% regularization parameter C=1.2 and the Gaussian kernel of width 2.1 and the
% precision parameter epsilon=1e-5 and 10MB of the kernel cache.
%
% For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/ .
%
%
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% LibSVM OneClass
disp('LibSVMOneClass');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('new_classifier', 'LIBSVM_ONECLASS');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% In this example a two-class linear programming machine (LPM) classifier is
% trained on a randomly generated examples. As solver the LPBOOST algorithm is
% used with the regularization parameter C=100 and the bias term in the
% classification rule switched off. Note that LPBOOST calls the CPLEX solver in
% its inner loop hence the CPLEX must be installed. The trained classifier is used
% to compute outputs on the training examples and the primal SVM objective
% function is computed.
%
% For more details on the LPBOOST see
% http://en.wikipedia.org/wiki/LPBoost
C=100;
epsilon=1e-3;
rand('state',17);
num=1000;
dim=20;
dist=1;
traindat=sparse([rand(dim,num/2)-4*dist, rand(dim,num/2)-dist]);
trainlab=[-ones(1,num/2), ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', false);
sg('svm_epsilon', epsilon);
sg('new_classifier', 'LPBOOST');
tic;
sg('train_classifier');
timelpboost=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
b
W'
obj=sum(abs(W))+C*sum(max(0,1-trainlab.*(W'*traindat+b)))
% In this example a two-class linear programming machine (LPM) classifier is
% trained on a randomly generated examples. The linear programming problem to
% which the learning problem is transformed is solved by the CPLEX solver which
% must be installed. The LPM regularization parameter is set to C=100 and the bias
% term in the classification rule is switched on. The example also shows how to
% compute classifier outputs on the training examples and the primal SVM objective
% function.
%
% For more details on the LPM see
% Weida Zhou, Li Zhang, Licheng Jiao. Linear programming support vector
% machines. Pattern Recognition, Volume 35, Issue 12, December 2002,
% pages 2927-2936.
%
C=100;
epsilon=1e-3;
rand('state',17);
num=1000;
dim=20;
dist=1;
traindat=sparse([rand(dim,num/2)-4*dist, rand(dim,num/2)-dist]);
trainlab=[-ones(1,num/2), ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', true);
sg('new_classifier', 'LPM');
tic;
sg('train_classifier');
timelpm=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
b
W'
obj=sum(abs(W))+C*sum(max(0,1-trainlab.*(W'*traindat+b)))
% In this example a two-class support vector machine classifier is trained on a
% toy data set and the trained classifier is used to predict labels of test
% examples. As training algorithm the Minimal Primal Dual SVM is used with SVM
% regularization parameter C=1.2 and a Gaussian kernel of width 2.1 and 10MB of
% kernel cache and the precision parameter epsilon=1e-5.
%
% For more details on the MPD solver see
% Kienzle, W. and B. Schölkopf: Training Support Vector Machines with Multiple
% Equality Constraints. Machine Learning: ECML 2005, 182-193. (Eds.) Carbonell,
% J. G., J. Siekmann, Springer, Berlin, Germany (11 2005)
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% MPDSVM
disp('MPDSVM');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'MPDSVM');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% This example shows how to use the Perceptron algorithm for training a
% two-class linear classifier, i.e. y = sign( <x,w>+b). The Perceptron algorithm
% works by iteratively passing though the training examples and applying the
% update rule on those examples which are misclassified by the current
% classifier. The Perceptron update rule reads
%
% w(t+1) = w(t) + alpha * y_t * x_t
% b(t+1) = b(t) + alpha * y_t
%
% where (x_t,y_t) is feature vector and label (must be +1/-1) of the misclassified example
% (w(t),b(t)) are the current parameters of the linear classifier
% (w(t+1),b(t+1)) are the new parameters of the linear classifier
% alpha is the learning rate.
%
% The Perceptron algorithm iterates until all training examples are correctly
% classified or the prescribed maximal number of iterations is reached.
%
% The learning rate and the maximal number of iterations can be set by
% sg('set_perceptron_parameters', alpha, max_iter);
%
addpath('tools');
% Perceptron
disp('Perceptron');
% create some seperable toy data
num=50;
label_train_twoclass=[-ones(1,num/2) ones(1,num/2)];
fm_train_real=[randn(5,num/2)-1, randn(5,num/2)+1];
fm_test_real=[randn(5,num)-1, randn(5,num)+1];
sg('set_features', 'TRAIN', fm_train_real);
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'PERCEPTRON');
%sg('set_perceptron_parameters', 1.6, 5000);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_real);
result=sg('classify');
% In this example a two-class linear support vector machine classifier is trained
% on a toy data set and the trained classifier is used to predict labels of
% test examples. As training algorithm the steepest descent subgradient algorithm
% is used with the SVM regularization parameter C=1.2 and the bias in the classification
% rule switched off. The solver iterates until it finds epsilon-precise solution
% (epsilon=1e-5) or the maximal training time (max_train_time=60 seconds) is exceeded.
%
% Note that this solver often has a tendency not to converge because the steepest descent
% subgradient algorithm is oversensitive to rounding errors. Note also that this
% is an unpublished work which was predecessor of the OCAS solver (see
% classifier_svmocas).
C=1.2;
use_bias=false;
epsilon=1e-5;
width=2.1;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% SubgradientSVM - often does not converge
disp('SubGradientSVM');
C=0.9;
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'SUBGRADIENTSVM');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
% sometimes does not terminate
%sg('train_classifier');
%sg('set_features', 'TEST', sparse(fm_test_real));
%result=sg('classify');
% In this example a two-class support vector machine classifier is trained on a
% DNA splice-site detection data set and the trained classifier is used to predict
% labels on test set. As training algorithm SVM^light is used with SVM
% regularization parameter C=1.2 and the Weighted Degree kernel of degree 20 and
% the precision parameter epsilon=1e-5.
%
% For more details on the SVM^light see
% T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
% Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
%
% For more details on the Weighted Degree kernel see
% G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively
% spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005.
C=1.2;
use_bias=false;
epsilon=1e-5;
addpath('tools');
label_train_dna=load_matrix('../data/label_train_dna.dat');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% SVMLight
try
disp('SVMLight');
degree=20;
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('set_labels', 'TRAIN', label_train_dna);
sg('new_classifier', 'SVMLIGHT');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
result=sg('classify');
catch
disp('No support for SVMLight available.')
end
% In this example a two-class support vector machine classifier is trained on a
% DNA splice-site detection data set and the trained classifier is used to
% predict labels on test set. As training algorithm SVM^light is used with SVM
% regularization parameter C=1 and the Weighted Degree kernel and the bias term in
% the classification rule switched off.
%
% For more details on the SVM^light see
% T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
% Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
%
% For more details on the Weighted Degree kernel see
% G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively
% spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005.
rand('seed',17);
%sequence lengths, number of sequences
len=200;
num_train=500;
num_test=500;
num_a=2;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%Weighted Degree kernel parameters
max_order=5;
order=15
max_mismatch=0;
cache=100;
normalize=true;
mkl_stepsize=1;
block=0;
single_degree=-1;
%generate some toy data
acgt='ACGT';
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
traindat(aa,trainlab==1)='A';
testdat=acgt(ceil(4*rand(len,num_test)));
testlab=[-ones(1,num_test/2),ones(1,num_test/2)];
testdat(aa,testlab==1)='A';
%traindat'
%input('key to continue')
%train svm
sg('use_linadd', true);
sg('use_batch_computation', false);
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('set_labels', 'TRAIN', trainlab);
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', cache, order, max_mismatch, normalize, mkl_stepsize, block, single_degree);
%sg('set_WD_position_weights', ones(1,100)/100) ;
%sg('set_WD_position_weights', ones(1,200)/200) ;
sg('new_classifier', 'SVMLIGHT');
sg('c',C);
tic;sg('train_classifier');toc;
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'DNA');
sg('set_labels', 'TEST', testlab);
%sg('init_kernel_optimization');
%sg('delete_kernel_optimization');
sg('use_batch_computation', true);
sg('delete_kernel_optimization');
out1=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out1)==testlab))
sg('use_batch_computation', true);
out2=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out2)==testlab))
sg('use_batch_computation', false);
tic;sg('init_kernel_optimization');toc;
%sg('delete_kernel_optimization');
tic;out3=sg('classify');toc;
fprintf('accuracy: %f \n', mean(sign(out3)==testlab))
max(abs(out1-out2))
max(abs(out1-out3))
% In this example a two-class support vector machine classifier is trained on a
% DNA splice-site detection data set and the trained classifier is then used to
% predict labels on test set. As training algorithm SVM^light is used with SVM
% regularization parameter C=1 and the Weighted Degree kernel and the bias term
% in the classification rule switched off.
%
% For more details on the SVM^light see
% T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
% Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
%
% For more details on the Weighted Degree kernel see
% G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively
% spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005.
rand('seed',17);
%sequence lengths, number of sequences
len=100;
num_train=200;
num_test=300;
num_a=3;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%Weighted Degree kernel parameters
max_order=8;
order=20;
shift=10 ;
max_mismatch=0;
cache=100;
single_degree=-1;
x=shift*rand(1,len);
%x(:)=0;
shifts = int32(floor(x(end:-1:1)));
% suboptimal position weights:
posweights = double(floor(x(end:-1:1)));
%generate some toy data
acgt='ACGT';
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
testdat=acgt(ceil(4*rand(len,num_test)));
testlab=[-ones(1,num_test/2),ones(1,num_test/2)];
aas=floor((shift+1)*rand(num_test,1));
idx=find(testlab==1);
for i=1:length(idx),
testdat(aa+aas(i),idx(i))='A';
end
%traindat=traindat(1:5,:) ;
%testdat=testdat(1:5,:) ;
%len=5 ;
traindat(end,end)='A' ;
%traindat'
%input('key to continue')
%train svm
sg('use_linadd', true);
sg('use_batch_computation', true);
sg('set_features', 'TRAIN', traindat,'DNA');
sg('set_labels', 'TRAIN', trainlab);
%sg('set_kernel', 'WEIGHTEDDEGREEPOS2', 'CHAR', 10, order, max_mismatch, len, shifts);
sg('set_kernel', 'WEIGHTEDDEGREEPOS3', 'CHAR', 10, order, max_mismatch, len, 1, shifts);
%sg('set_kernel', 'WEIGHTEDDEGREEPOS3', 'CHAR', 10, order, max_mismatch, len, 1, shifts, posweights);
%sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', cache, order, max_mismatch, normalize, mkl_stepsize, block, single_degree);
%sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', cache, order);
%sg('set_WD_position_weights', ones(1,100)/100) ;
sg('new_classifier', 'SVMLIGHT');
sg('c',C);
sg('train_classifier');
%w=sg('get_subkernel_weights') ;
%w(1:3)=1 ;
%w(2:3)=0 ;
%w(3)=1 ;
%sg('set_subkernel_weights',w) ;
%z=cell(); z{10}='';
%for i=1:10;
% z{i}=traindat(:,i)';
%end
%sg('set_features', 'TEST', z,'DNA');
sg('set_features', 'TEST', testdat,'DNA');
sg('set_labels', 'TEST', testlab);
sg('use_batch_computation', false);
sg('delete_kernel_optimization');
out1=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out1)==testlab))
sg('set_kernel_optimization_type', 'SLOWBUTMEMEFFICIENT');
sg('use_batch_computation', true);
sg('delete_kernel_optimization');
sg('train_classifier')
out2=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out2)==testlab))
sg('set_kernel_optimization_type', 'FASTBUTMEMHUNGRY');
sg('use_batch_computation', true);
sg('delete_kernel_optimization');
out3=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out3)==testlab))
sg('set_kernel_optimization_type', 'SLOWBUTMEMEFFICIENT');
%sg('set_kernel_optimization_type', 'FASTBUTMEMHUNGRY');
sg('use_batch_computation', false);
tic;sg('init_kernel_optimization');toc;
%sg('delete_kernel_optimization');
tic;out4=sg('classify');toc;
fprintf('accuracy: %f \n', mean(sign(out4)==testlab))
sg('set_kernel_optimization_type', 'FASTBUTMEMHUNGRY');
sg('use_batch_computation', false);
tic;sg('init_kernel_optimization');toc;
%sg('delete_kernel_optimization');
tic;out5=sg('classify');toc;
fprintf('accuracy: %f \n', mean(sign(out5)==testlab))
max(abs(out1-out2))
max(abs(out1-out3))
max(abs(out1-out4))
max(abs(out1-out5))
%max(abs(out2-out3))
%xmax(abs(out3-out4))
return
%evaluate svm on train data
sg('set_features', 'TEST', traindat,'DNA');
sg('set_labels', 'TEST', trainlab);
out=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out)==trainlab))
%evaluate svm on test data
sg('set_features', 'TEST', testdat,'DNA');
sg('set_labels', 'TEST', testlab);
out=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out)==testlab))
% In this example a two-class linear support vector machine classifier is trained
% on a toy data set and the trained classifier is used to predict labels of
% test examples. As training algorithm the SVMLIN solver is used with the SVM
% regularization parameter C=1.2 and the bias term in the classification rule
% switched off. The solver iterates until it finds the epsilon-precise solution
% (epsilon=1e-5) or the maximal training time (max_train_time=60 seconds) is exceeded.
%
% For more details on the SVMLIN solver see
% V. Sindhwani, S.S. Keerthi. Newton Methods for Fast Solution of Semi-supervised
% Linear SVMs. Large Scale Kernel Machines MIT Press (Book Chapter), 2007
C=1.2;
use_bias=false;
epsilon=1e-5;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% SVMLin
disp('SVMLin');
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'SVMLIN');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', sparse(fm_test_real));
result=sg('classify');
% In this example a two-class linear support vector machine classifier is trained
% on a toy data set and the trained classifier is used to predict labels of
% test examples. As training algorithm the OCAS solver is used with the SVM
% regularization parameter C=1.2 and the bias term in the classification rule
% switched off. The solver iterates until the relative duality gap falls below
% epsilon=1e-5 or the maximal training time (max_train_time=60 seconds) is
% exceeded.
%
% For more details on the OCAS solver see
% V. Franc, S. Sonnenburg. Optimized Cutting Plane Algorithm for Large-Scale Risk
% Minimization.The Journal of Machine Learning Research, vol. 10,
% pp. 2157--2192. October 2009.
%
C=1.2;
use_bias=false;
epsilon=1e-5;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% SVMOcas
disp('SVMOcas');
sg('new_classifier', 'SVMOCAS');
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', sparse(fm_test_real));
result=sg('classify');
% In this example a two-class linear support vector machine classifier is trained
% on randomly generated data. As training algorithm the OCAS solver is used with
% the SVM regularization parameter C=10 and the bias term in the classification
% rule switched off. The solver iterates until the relative duality gap falls
% below epsilon=1e-3. The trained classifier is then used to compute outputs on
% the training examples and the primal SVM objective function is computed.
%
% For more details on the OCAS solver see
% V. Franc, S. Sonnenburg. Optimized Cutting Plane Algorithm for Large-Scale Risk
% Minimization.The Journal of Machine Learning Research, vol. 10,
% pp. 2157--2192. October 2009.
C=10;
epsilon=1e-3;
rand('state',17);
num=16;
dim=10;
dist=0.001;
traindat=[rand(dim,num/2)-dist, rand(dim,num/2)+dist];
scale=(dim*mean(traindat(:)));
traindat=sparse(traindat/scale);
trainlab=[-ones(1,num/2), +ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', false);
sg('svm_bufsize', 1000);
sg('svm_epsilon', epsilon);
sg('new_classifier', 'SVMOCAS');
tic;
sg('train_classifier');
timeocas=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
sg('new_classifier', 'SVMOCAS');
sg('set_linear_classifier', b, W');
sg('set_features', 'TEST', traindat);
trainout2=sg('classify');
trainerr2=mean(trainlab~=sign(trainout2))
max(abs(trainout-trainout2))
b
W'
obj=sum(W.^2)+C*sum((1-trainlab.*(W'*traindat+b)).^2)
% In this example a two-class linear support vector machine classifier is trained
% on a toy data set and the trained classifier is then used to predict labels of
% test examples. As training algorithm the Stochastic Gradient Descent (SGD)
% solver is used with the SVM regularization parameter C=1.2 and the bias term in the
% classification rule switched off. The solver iterates until the maximal
% training time (max_train_time=60 seconds) is exceeded.
%
% For more details on the SGD solver see
% L. Bottou, O. Bousquet. The tradeoff of large scale learning. In NIPS 20. MIT
% Press. 2008.
C=1.2;
use_bias=false;
epsilon=1e-5;
max_train_time=60;
addpath('tools');
label_train_twoclass=load_matrix('../data/label_train_twoclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% SVMSGD
disp('SVMSGD');
sg('set_features', 'TRAIN', sparse(fm_train_real));
sg('set_labels', 'TRAIN', label_train_twoclass);
sg('new_classifier', 'SVMSGD');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', use_bias);
sg('svm_max_train_time', max_train_time);
sg('c', C);
sg('train_classifier');
sg('set_features', 'TEST', sparse(fm_test_real));
result=sg('classify');
% In this example a two-class linear support vector machine classifier is trained
% on randomly generated data. As training algorithm the Stochastic Gradient
% Descent (SGD) solver is used with the SVM regularization parameter C=10 and the
% bias term in the classification rule switched off. The example also shows how to
% compute classifier outputs on the test data and the value of the primal SVM
% objective function.
%
% For more details on the SGD solver see
% L. Bottou, O. Bousquet. The tradeoff of large scale learning. In NIPS 20. MIT
% Press. 2008.
C=10;
rand('state',17);
num=16;
dim=10;
dist=0.001;
traindat=[rand(dim,num/2)-dist, rand(dim,num/2)+dist];
scale=(dim*mean(traindat(:)));
traindat=sparse(traindat/scale);
trainlab=[-ones(1,num/2), +ones(1,num/2) ];
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_use_bias', false);
sg('new_classifier', 'SVMSGD');
tic;
sg('train_classifier');
timesgd=toc
[b,W]=sg('get_classifier');
sg('set_features', 'TEST', traindat);
trainout=sg('classify');
trainerr=mean(trainlab~=sign(trainout))
b
W'
obj=sum(W.^2)+C*sum((1-trainlab.*(W'*traindat+b)).^2)
% In this example an agglomerative hierarchical single linkage clustering method
% is used to cluster a given toy data set. Starting with each object being
% assigned to its own cluster clusters are iteratively merged. Here the clusters
% are merged that have the closest (minimum distance, here set via the Euclidean
% distance object) two elements.
addpath('tools');
fm_train=load_matrix('../data/fm_train_real.dat');
% KMEANS
disp('KMeans');
k=3;
iter=1000;
sg('set_features', 'TRAIN', fm_train);
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('new_clustering', 'KMEANS');
sg('train_clustering', k, iter);
[radi, centers]=sg('get_clustering');
% In this example the k-means clustering method is used to cluster a given toy
% data set. In k-means clustering one tries to partition n observations into k
% clusters in which each observation belongs to the cluster with the nearest mean.
% The algorithm class constructor takes the number of clusters and a distance to
% be used as input. The distance used in this example is Euclidean distance.
% After training one can fetch the result of clustering by obtaining the cluster
% centers and their radiuses.
addpath('tools');
fm_train=load_matrix('../data/fm_train_real.dat');
% Hierarchical
disp('Hierarchical');
merges=3;
sg('set_features', 'TRAIN', fm_train);
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('new_clustering', 'HIERARCHICAL');
sg('train_clustering', merges);
[merge_distance, pairs]=sg('get_clustering');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'BRAYCURTIS'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance matrix is computed by
% 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
% target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
% these two matrices is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CBrayCurtisDistance.html.
%
% Obviously, using the Bray Curtis distance is not limited to this showcase
% example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% BrayCurtis Distance
disp('BrayCurtisDistance');
sg('set_distance', 'BRAYCURTIS', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);;
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'CANBERRA'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance (dissimilarity ratio) matrix is
% computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance (dissimilarity ratio)
% matrix between these two data sets is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
% target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CCanberraMetric.html.
%
% Obviously, using the Canberra distance is not limited to this showcase
% example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Canberra Metric
disp('CanberraMetric');
sg('set_distance', 'CANBERRA', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored data sets in 'STRING' representation
% (feature type 'CHAR' with alphabet 'DNA') from different files and
% initializes the distance to 'CANBERRA' with feature type 'WORD'.
%
% Data points in this example are defined by the transformation function
% 'convert' and the preprocessing step applied afterwards (defined by
% 'add_preproc' and preprocessor 'SORTWORDSTRING').
%
% The target 'TRAIN' for 'set_features' controls the binding of the given
% data points. In order to compute a pairwise distance matrix by
% 'get_distance_matrix', we have to perform two preprocessing steps for
% input data 'TRAIN'. The method 'convert' transforms the input data to
% a string representation suitable for the selected distance. The individual
% strings are sorted in ascending order after the execution of 'attach_preproc'.
% A pairwise distance matrix is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the binding of the given
% data points 'TRAIN' and 'TEST'. In order to compute a pairwise distance
% matrix between these two data sets by 'get_distance_matrix', we have to
% perform two preprocessing steps for input data 'TEST'. The method 'convert'
% transforms the input data 'TEST' to a string representation suitable for
% the selected distance. The individual strings are sorted in ascending order
% after the execution of 'attach_preproc'. A pairwise distance matrix between
% the data sets 'TRAIN' and 'TEST' is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see
% doc/classshogun_1_1CSortWordString.html,
% doc/classshogun_1_1CPreProc.html,
% doc/classshogun_1_1CStringFeatures.html (method obtain_from_char_features) and
% doc/classshogun_1_1CCanberraWordDistance.html.
%
% Obviously, using the Canberra word distance is not limited to this showcase
% example.
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
order=3;
gap=0;
reverse='n';
% CanberraWord Distance
disp('CanberraWordDistance');
sg('set_distance', 'CANBERRA', 'WORD');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'CHEBYSHEW'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance matrix (maximum of absolute feature
% dimension differences) is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance matrix (maximum
% of absolute feature dimension differences) between these two data sets is
% computed.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CChebyshewMetric.html.
%
% Obviously, using the Chebyshew distance is not limited to this showcase
% example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Chebyshew Metric
disp('ChebyshewMetric');
sg('set_distance', 'CHEBYSHEW', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'CHISQUARE'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance matrix is computed by
% 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
% these two matrices is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CChiSquareDistance.html.
%
% Obviously, using the ChiSquare distance is not limited to this showcase
% example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Chi Square Metric
disp('ChiSquareDistance');
sg('set_distance', 'CHISQUARE', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'COSINE'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance matrix is computed by
% 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
% target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
% these two data sets is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CCosineDistance.html.
%
% Obviously, using the Cosine distance is not limited to this showcase
% example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Cosine Distance
disp('CosineDistance');
sg('set_distance', 'COSINE', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'EUCLIDIAN'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance matrix is computed by
% 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
% target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
% these two data sets is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CEuclidianDistance.html.
%
% Obviously, using the Euclidian distance is not limited to this showcase
% example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Euclidian Distance
disp('EuclidianDistance');
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);;
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'GEODESIC'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance (shortest path on a sphere) matrix is
% computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
% target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance (shortest path on
% a sphere) matrix between these two data sets is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CGeodesicMetric.html.
%
% Obviously, using the Geodesic distance is not limited to this showcase
% example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Geodesic Metric
disp('GeodesicMetric');
sg('set_distance', 'GEODESIC', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored data sets in 'STRING' representation
% (feature type 'CHAR' with alphabet 'DNA') from different files and
% initializes the distance to 'HAMMING' with feature type 'WORD'.
%
% Data points in this example are defined by the transformation function
% 'convert' and the preprocessing step applied afterwards (defined by
% 'add_preproc' and preprocessor 'SORTWORDSTRING').
%
% The target 'TRAIN' for 'set_features' controls the binding of the given
% data points. In order to compute a pairwise distance matrix by
% 'get_distance_matrix', we have to perform two preprocessing steps for
% input data 'TRAIN'. The method 'convert' transforms the input data to
% a string representation suitable for the selected distance. The individual
% strings are sorted in ascending order after the execution of 'attach_preproc'.
% A pairwise distance matrix is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the binding of the given
% data points 'TRAIN' and 'TEST'. In order to compute a pairwise distance
% matrix between these two data sets by 'get_distance_matrix', we have to
% perform two preprocessing steps for input data 'TEST'. The method 'convert'
% transforms the input data 'TEST' to a string representation suitable for
% the selected distance. The individual strings are sorted in ascending order
% after the execution of 'attach_preproc'. A pairwise distance matrix between
% the data sets 'TRAIN' and 'TEST' is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see
% doc/classshogun_1_1CSortWordString.html,
% doc/classshogun_1_1CPreProc.html,
% doc/classshogun_1_1CStringFeatures.html (method obtain_from_char_features) and
% doc/classshogun_1_1CHammingWordDistance.html.
%
% Obviously, using the Hamming word distance is not limited to this showcase
% example.
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
order=3;
gap=0;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
% HammingWord Distance
disp('HammingWordDistance');
sg('set_distance', 'HAMMING', 'WORD');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'JENSEN'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance (divergence measure based on the
% Kullback-Leibler divergence) matrix is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
% target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance (divergence measure
% based on the Kullback-Leibler divergence) matrix between these two data sets
% is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CJensenMetric.html.
%
% Obviously, using the Jensen-Shannon distance/divergence is not limited to
% this showcase example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Jensen Metric
disp('JensenMetric');
sg('set_distance', 'JENSEN', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% n approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'MANHATTAN'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance (sum of absolute feature
% dimension differences) matrix is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
% target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance (sum of absolute
% feature dimension differences) matrix between these two data sets is
% computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CManhattanMetric.html.
%
% Obviously, using the Manhattan distance is not limited to this showcase
% example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Manhattan Metric
disp('ManhattanMetric');
sg('set_distance', 'MANHATTAN', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored data sets in 'STRING' representation
% (feature type 'CHAR' with alphabet 'DNA') from different files and
% initializes the distance to 'MANHATTAN' with feature type 'WORD'.
%
% Data points in this example are defined by the transformation function
% 'convert' and the preprocessing step applied afterwards (defined by
% 'add_preproc' and preprocessor 'SORTWORDSTRING').
%
% The target 'TRAIN' for 'set_features' controls the binding of the given
% data points. In order to compute a pairwise distance matrix by
% 'get_distance_matrix', we have to perform two preprocessing steps for
% input data 'TRAIN'. The method 'convert' transforms the input data to
% a string representation suitable for the selected distance. The individual
% strings are sorted in ascending order after the execution of 'attach_preproc'.
% A pairwise distance matrix is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the binding of the given
% data points 'TRAIN' and 'TEST'. In order to compute a pairwise distance
% matrix between these two data sets by 'get_distance_matrix', we have to
% perform two preprocessing steps for input data 'TEST'. The method 'convert'
% transforms the input data 'TEST' to a string representation suitable for
% the selected distance. The individual strings are sorted in ascending order
% after the execution of 'attach_preproc'. A pairwise distance matrix between
% the data sets 'TRAIN' and 'TEST' is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see
% doc/classshogun_1_1CSortWordString.html,
% doc/classshogun_1_1CPreProc.html,
% doc/classshogun_1_1CStringFeatures.html (method obtain_from_char_features) and
% doc/classshogun_1_1CManhattanWordDistance.html.
%
% Obviously, using the Manhattan word distance is not limited to this showcase
% example.
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
order=3;
gap=0;
reverse='n';
% ManhattanWord Distance
disp('ManhattanWordDistance');
sg('set_distance', 'MANHATTAN', 'WORD');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'MINKOWSKI' with
% norm 'k'. Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance matrix is computed by
% 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
% target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
% these two data sets is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CMinkowskiMetric.html.
%
% Obviously, using the Minkowski metric is not limited to this showcase
% example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Minkowski Metric
disp('MinkowskiMetric');
k=3;
sg('set_distance', 'MINKOWSKI', 'REAL', k);
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% An approach as applied below, which shows the processing of input data
% from a file becomes a crucial factor for writing your own sample applications.
% This approach is just one example of what can be done using the distance
% functions provided by shogun.
%
% First, you need to determine what type your data will be, because this
% will determine the distance function you can use.
%
% This example loads two stored matrices of real values (feature type 'REAL')
% from different files and initializes the distance to 'TANIMOTO'.
% Each column of the matrices corresponds to one data point.
%
% The target 'TRAIN' for 'set_features' controls the processing of the given
% data points, where a pairwise distance (extended Jaccard coefficient)
% matrix is computed by 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
% target 'TRAIN'.
%
% The target 'TEST' for 'set_features' controls the processing of the given
% data points 'TRAIN' and 'TEST', where a pairwise distance (extended
% Jaccard coefficient) matrix between these two data sets is computed by
% 'get_distance_matrix'.
%
% The resulting distance matrix can be reaccessed by 'get_distance_matrix'
% and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
%
% For more details see doc/classshogun_1_1CTanimotoDistance.html.
%
% Obviously, using the Tanimoto distance/coefficient is not limited to
% this showcase example.
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Tanimoto Metric
disp('TanimotoDistance');
sg('set_distance', 'TANIMOTO', 'REAL');
sg('set_features', 'TRAIN', fm_train_real);
dm=sg('get_distance_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);;
dm=sg('get_distance_matrix', 'TEST');
% In this example the Histogram algorithm object computes a histogram over all
% 16bit unsigned integers in the features.
% Explicit examples on how to use distributions
leng=50;
rep=5;
weight=1;
order=3;
gap=0;
num=12;
len=23;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
% Histogram
disp('Histogram');
%sg('new_distribution', 'HISTOGRAM');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
% sg('train_distribution');
% histo=sg('get_histogram');
% num_param=sg('get_histogram_num_model_parameters');
% for i = 1:num,
% for j = 1:num_param,
% sg(sprintf('get_log_derivative %d %d', j, i));
% end
% end
% sg('get_log_likelihood');
% sg('get_log_likelihood_sample');
% In this example a hidden markov model with 3 states and 6 transitions is trained
% on a string data set.
leng=50;
rep=5;
weight=1;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
% HMM
disp('HMM');
N=3;
M=6;
% generate a sequence with characters 1-6 drawn from 3 loaded cubes
for i = 1:3,
a{i}= [ ones(1,ceil(leng*rand)) 2*ones(1,ceil(leng*rand)) 3*ones(1,ceil(leng*rand)) 4*ones(1,ceil(leng*rand)) 5*ones(1,ceil(leng*rand)) 6*ones(1,ceil(leng*rand)) ];
a{i}= a{i}(randperm(length(a{i})));
end
s=[];
for i = 1:size(a,2),
s= [ s i*ones(1,ceil(rep*rand)) ];
end
s=s(randperm(length(s)));
sequence={''};
for i = 1:length(s),
f(i)=ceil(((1-weight)*rand+weight)*length(a{s(i)}));
t=randperm(length(a{s(i)}));
r=a{s(i)}(t(1:f(i)));
sequence{1}=[sequence{1} char(r+'0')];
end
sg('new_hmm', N, M);
sg('set_features','TRAIN', sequence, 'CUBE');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', 1);
sg('bw');
[p, q, a, b]=sg('get_hmm');
sg('new_hmm', N, M);
sg('set_hmm', p, q, a, b);
likelihood=sg('hmm_likelihood');
% Trains an inhomogeneous Markov chain of order 3 on a DNA string data set. Due to
% the structure of the Markov chain it is very similar to a HMM with just one
% chain of connected hidden states - that is why we termed this linear HMM.
order=3;
gap=0;
num=12;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
% LinearHMM
disp('LinearHMM');
%sg('new_distribution', 'LinearHMM');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
% sg('train_distribution');
% histo=sg('get_histogram');
% num_param=sg('get_histogram_num_model_parameters');
% for i = 1:num,
% for j = 1:num_param,
% sg(sprintf('get_log_derivative %d %d', j, i));
% end
% end
% sg('get_log_likelihood');
% sg('get_log_likelihood_sample');
% Trains an two inhomogeneous Markov chain of order 2 on a two DNA string data
% sets. The learned models are used to distinguish these DNA data sets from each
% other by the posterior log-odds.
seqlen=100;
numseq=50000;
order=2; %max 8, markov chain has in fact of order-1
ppseudo=1e-5;
npseudo=10;
motifidx=10:21;
acgt='ACGT';
rand('state', 17);
LT=[-ones(1,numseq), ones(1,numseq)];
XT=acgt(ceil(3*rand(seqlen,2*numseq)));
XT(motifidx,LT==1)='T';
LV=[-ones(1,numseq), ones(1,numseq)];
XV=acgt(ceil(3*rand(seqlen,2*numseq)));
XV(motifidx,LV==1)='T';
sg('set_features', 'TRAIN', XT(:,LT==1), 'DNA') ;
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order);
sg('pseudo', ppseudo);
sg('new_hmm', size(XT,1), 4^order);
sg('linear_train');
[p_p,q_p,a_p,b_p]=sg('get_hmm');
sg('set_features', 'TEST', XV, 'DNA') ;
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order);
posout=sg('one_class_linear_hmm_classify');
sg('set_features', 'TRAIN', XT(:,LT==-1), 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order);
sg('pseudo', npseudo);
sg('new_hmm', size(XT,1), 4^order);
sg('linear_train');
[p_n,q_n,a_n,b_n]=sg('get_hmm');
sg('set_features', 'TEST', XV, 'DNA') ;
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order);
negout=sg('one_class_linear_hmm_classify');
output=posout-negout;
err=mean(sign(output)~=LV)
% Demonstrates the use of polynomial dotfeatures, i.e. features for which the
% feature space of the polynomial kernel is computed on-demand / on-the-fly.
degree = 2;
traindat = [rand(10,50)-1 2+rand(10,50)+1];
testdat = [rand(10,50)-1 2+rand(10,50)+1];
trainlab = [ones(1, 50) -ones(1, 50)];
C=1;
size_cache=10;
epsilon=1e-5;
sg('set_kernel', 'POLY', 'REAL', size_cache, degree);
%sg('set_kernel_normalization', 'IDENTITY');
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('new_classifier', 'SVMLIGHT');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('c', C);
km=sg('get_kernel_matrix', 'TRAIN');
tic; sg('train_classifier'); toc
sg('set_features', 'TEST', testdat);
result=sg('classify');
normalize=1;
sg('loglevel', 'DEBUG');
sg('svm_use_bias', 0);
sg('set_features', 'TRAIN', traindat, 'POLY', degree, normalize);
x = sg('get_features', 'TRAIN');
km2=x'*x;
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', testdat, 'POLY', degree, normalize);
out_wdocas=sg('classify');
% Demonstrates the use of weighted degree features, i.e. features for which the
% feature space of the weighted degree kernel is computed on-demand / on-the-fly.
% These features can be particularly fast in linear SVM solvers.
C=1;
order=6;
degree=order;
from_order=6;
max_mismatch=0;
cache=100;
normalize=1;
mkl_stepsize=1;
block=1;
single_degree=-1;
epsilon=1e-5;
rand('seed',17);
%sequence lengths, number of sequences
len=20;
num_train=10;
num_a=5;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
epsilon=1e-6;
%generate some toy data
acgt='ACGT';
shift=1;
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
testdat=traindat;
testlab=trainlab;
%train svm
sg('threads',1);
sg('use_linadd', 1);
sg('use_batch_computation', 1);
sg('progress', 'ON');
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('set_labels', 'TRAIN', trainlab);
sg('svm_use_bias', 0);
sg('new_classifier', 'LIGHT');
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', cache, from_order, max_mismatch, normalize, mkl_stepsize, block, single_degree);
%x=sg('get_subkernel_weights');
%
%sg(sprintf( 'set_kernel WEIGHTEDDEGREE CHAR %i %i %i %i %i %i %i', cache, order, max_mismatch, 0, mkl_stepsize, block, single_degree) );
%sg('set_subkernel_weights',x(1:order));
%
%%kmu=sg('get_kernel_matrix', 'TRAIN');
%
%sg(sprintf( 'set_kernel WEIGHTEDDEGREE CHAR %i %i %i %i %i %i %i', cache, order, max_mismatch, normalize, mkl_stepsize, block, single_degree) );
%sg('set_subkernel_weights',x(1:order));
%%km=sg('get_kernel_matrix', 'TRAIN');
%sg('new_classifier LIGHT');
sg('c',C);
tic;
sg('svm_train');
tim_lo=toc;
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'DNA');
out_ref=sg('svm_classify');
%prc_ref=calcrfcscore(out_ref, testlab);
%roc_ref=calcrocscore(out_ref, testlab);
traindat(traindat=='A')=0;
traindat(traindat=='C')=1;
traindat(traindat=='G')=2;
traindat(traindat=='T')=3;
traindat=uint8(traindat);
testdat(testdat=='A')=0;
testdat(testdat=='C')=1;
testdat(testdat=='G')=2;
testdat(testdat=='T')=3;
testdat=uint8(testdat);
sg('set_features', 'TRAIN', traindat', 'RAWDNA');
sg('set_labels', 'TRAIN', trainlab);
sg('c',C);
sg('svm_epsilon', epsilon);
sg('new_classifier','WDSVMOCAS',order, from_order);
tic;
sg('svm_train');
tim_lo=toc;
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'RAWDNA');
out=sg('svm_classify');
%prc=calcrfcscore(out, testlab);
%roc=calcrocscore(out, testlab);
sg('set_features', 'TRAIN', traindat, 'RAWDNA', 'WD', order, from_order);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', testdat, 'RAWDNA', 'WD', order, from_order);
out_wdocas=sg('classify');
max(abs(out-out_ref))
max(abs(out_wdocas-out_ref))
max(abs(out_wdocas-out))
dat=[];
weights=sqrt((degree:-1:1)/sum(degree:-1:1))/4.281744;
N = size(traindat,1);
nDim = 0;
for d = 1:degree,
nDim = nDim + 4^d;
end
nDim = nDim*N;
for j=1:size(traindat,2),
dat(:,j)= zeros(nDim,1);
offset = 0;
for i=1:N,
val = 0;
for d = 1:degree
if i+d-1<=N,
val = 4*val + double(traindat(i+d-1,j));
dat(offset+val+1,j) = weights(d);
offset = offset + 4^d;
end
end
end
end
traindat=sparse(dat);
testdat=traindat;
sg('set_features', 'TRAIN', traindat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', traindat);
out_ocas=sg('classify');
sg('set_features', 'TRAIN', dat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', dat);
out_docas=sg('classify');
max(abs(out-out_ocas))
max(abs(out-out_ref))
max(abs(out_ocas-out_ref))
max(abs(out_ocas-out_docas))
sg('set_features', 'TRAIN', [traindat;2*traindat]);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('set_features', 'TEST', [traindat;2*traindat]);
out1=sg('classify');
sg('clean_features','TRAIN');
sg('clean_features','TEST');
sg('add_dotfeatures', 'TRAIN', traindat);
sg('add_dotfeatures', 'TRAIN', 2*dat);
sg('set_labels', 'TRAIN', trainlab);
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
sg('add_dotfeatures', 'TEST', traindat);
sg('add_dotfeatures', 'TEST', 2*dat);
out2=sg('classify');
max(abs(out1-out2))
% Demonstrates the use of weighted spectrum features, i.e. features for which the
% feature space of the weighted spectrum kernel is computed on-demand / on-the-fly.
% These features can be particularly fast in linear SVM solvers.
rand('seed',17);
%sequence lengths, number of sequences
len=100;
num_train=10;
num_a=5;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
epsilon=1e-8;
%SVM regularization factor C
C=1;
%Spectrum kernel parameters
order=8;
cache=10;
use_sign=false;
normalize=true;
if normalize,
normalization='FULL'; %NO,SQRT,LEN,SQLEN,FULL
else
normalization='NO'; %NO,SQRT,LEN,SQLEN,FULL
end
%generate some toy data
acgt='ACGT';
shift=40;
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
sg('loglevel', 'ALL');
%%% spec
weights=(order:-1:1);
weights=weights/sum(weights);
km=zeros(size(traindat,2));
for o=1:order,
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', o, order-1);
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'COMMSTRING', 'WORD',cache, use_sign, "NO");
km=km+weights(o)*sg('get_kernel_matrix', 'TRAIN');
end
km2=km;
if normalize,
for i=1:size(km,1),
for j=1:size(km,2),
km2(i,j)=km(i,j)/(sqrt(km(i,i)*km(j,j)));
end
end
end
%%% wdspec
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, 0, 'r');
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', cache, use_sign, normalization);
feat=sg('get_features','TRAIN');
wkm=sg('get_kernel_matrix', 'TRAIN');
fprintf('max diff %g\n', max(abs(wkm(:)-km2(:))))
sg('c', C);
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('use_linadd', true);
sg('new_classifier', 'SVMLIGHT');
sg('set_labels','TRAIN', trainlab);
sg('train_classifier');
[bias, alphas]=sg('get_classifier');
sg('init_kernel_optimization');
svmw=sg('get_kernel_optimization');
sg('set_features', 'TEST', traindat, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, 0, 'r');
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TEST');
out_ref=sg('classify');
sg('c', C);
sg('clean_features', 'TRAIN');
sg('clean_features', 'TEST');
sg('svm_epsilon', epsilon);
sg('svm_use_bias', 0);
sg('use_linadd', false);
sg('new_classifier', 'SVMLIGHT');
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
sg('set_labels','TRAIN', trainlab);
sg('set_kernel','CUSTOM', km2, 'FULL');
sg('train_classifier');
sg('set_features', 'TEST', traindat, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
out_ref2=sg('classify');
traindat(traindat=='A')=0;
traindat(traindat=='C')=1;
traindat(traindat=='G')=2;
traindat(traindat=='T')=3;
traindat=uint8(traindat);
testdat=uint8(traindat);
clear sg
sg('svm_use_bias', 0);
sg('svm_epsilon', epsilon);
sg('set_labels','TRAIN', trainlab);
sg('set_features', 'TRAIN', traindat, 'RAWDNA','WSPEC', order, order-1, normalize);
sg('new_classifier', 'SVMOCAS');
sg('train_classifier');
[bias_ocas, alphas_ocas]=sg('get_classifier');
sg('set_features', 'TEST', testdat, 'RAWDNA','WSPEC', order, order-1, normalize);
out=sg('classify');
fprintf('max out diff %g\n', max(abs(out-out_ref)))
fprintf('max out diff %g\n', max(abs(out-out_ref2)))
max(abs(svmw(1:length(alphas_ocas))-alphas_ocas'))
%o=[];
%for i=1:length(feat),
% o(i)=alphas_ocas*feat{i};
%end
% Demonstrates on some toy data how by sliding a window over a single string a
% whole string data set can be constructed in a very memory efficient manner.
% Instead of sliding the window over the string in fixed steps one can also
% extract windows by supplying a position list.
acgt='ACGT';
dat={acgt([1*ones(1,10) 2*ones(1,10) 3*ones(1,10) 4*ones(1,10) 1])};
sg('set_features', 'TRAIN', dat, 'DNA', 'slide_window', 5, 1);
f=sg('get_features', 'TRAIN')
sg('set_features', 'TRAIN', dat, 'DNA', 'from_position_list',5, int32([0,1,2,5,15,25,30,36]));
f=sg('get_features', 'TRAIN')
sg('set_features', 'TEST', dat, 'DNA', 'from_position_list',5, int32([0,1,2,5,15,25,30,36]));
ft=sg('get_features', 'TEST')
C=1;
order=20;
order_com=5;
max_mismatch=0;
len=200;
shift=0;
num=100;
num_test=5000;
cache=10;
normalize=true;
mkl_stepsize=1;
block=0;
single_degree=-1;
sg('set_kernel', 'WEIGHTEDDEGREE', 'STRING', cache, order, max_mismatch, normalize, mkl_stepsize, block, single_degree);
km=sg('get_kernel_matrix', 'TRAIN')
sg('clean_features', 'TRAIN');
sg('clean_features', 'TEST');
sg('set_features', 'TRAIN', dat, 'DNA', 'from_position_list',5, int32([0,1,2,5,15,25,30]+5));
sg('set_features', 'TRAIN', dat, 'DNA', 'from_position_list',5, int32([0,1,2,5,15,25]+9));
sg('clean_features', 'TRAIN');
% This is an example for the initialization of the chi2-kernel on real data, where
% each column of the matrices corresponds to one training/test example.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
width=1.4;
% CHI2
disp('Chi2');
sg('set_kernel', 'CHI2', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
% This is an example for the initialization of a combined kernel, which is a weighted sum of
% in this case three kernels on real valued data. The sub-kernel weights are all set to 1.
%
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Combined
disp('Combined');
sg('clean_features','TRAIN');
sg('clean_features','TEST');
sg('set_kernel', 'COMBINED', size_cache);
sg('add_kernel', 1, 'LINEAR', 'REAL', size_cache);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', size_cache, 1);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('add_kernel', 1, 'POLY', 'REAL', size_cache, 3, false);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TRAIN');
km=sg('get_kernel_matrix', 'TEST');
% This is an example for the initialization of the CommUlongString-kernel. This kernel
% sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
% that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
% only once.
size_cache=10;
order=30;
gap=0;
reverse='n';
use_sign=0;
normalization='FULL';
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Comm Ulong String
disp('CommUlongString');
sg('add_preproc', 'SORTULONGSTRING');
sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
% This is an example for the initialization of the CommWordString-kernel (aka
% Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel
% sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
% that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
% only once.
size_cache=10;
order=7;
gap=0;
reverse='n';
use_sign=0;
normalization='FULL';
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Comm Word String
disp('CommWordString');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
% The constant kernel gives a trivial kernel matrix with all entries set to the same value
% defined by the argument 'c'.
%
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Const
disp('Const');
c=23;
sg('set_kernel', 'CONST', 'REAL', size_cache, c);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
% A user defined custom kernel is assigned in this example, for which only the upper triangle may be given (DIAG) or
% the FULL matrix (FULL), or the full matrix which is then internally stored as a upper
% triangle (FULL2DIAG). Labels for the examples are given, a svm is trained and the svm is used to classify the examples.
%
truth = sign(2*rand(1,60) - 1);
km=rand(length(truth));
km=km+km';
sg('set_kernel', 'CUSTOM', km, 'FULL');
sg('set_labels', 'TRAIN', truth);
sg('new_classifier', 'LIBSVM');
sg('train_classifier');
out_all = sg('classify');
out = sg('classify_example',0);
% This is an example for the initialization of the diag-kernel.
% The diag kernel has all kernel matrix entries but those on
% the main diagonal set to zero.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Diag
disp('Diag');
diag=23.;
sg('set_kernel', 'DIAG', 'REAL', size_cache, diag);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
% With the distance kernel one can use any of the following distance metrics:
% MINKOWSKI MANHATTAN HAMMING CANBERRA CHEBYSHEW GEODESIC JENSEN CHISQUARE TANIMOTO COSINE BRAYCURTIS EUCLIDIAN
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Distance
disp('Distance');
width=1.7;
sg('set_distance', 'EUCLIDIAN', 'REAL');
sg('set_kernel', 'DISTANCE', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
% The FixedDegree String kernel takes as input two strings of same size and counts the number of matches of length d.
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Fixed Degree String
disp('FixedDegreeString');
degree=3;
sg('set_kernel', 'FIXEDDEGREE', 'CHAR', size_cache, degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
% The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.
size_cache=10;
width=2.1;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Gaussian
disp('Gaussian');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
% An experimental kernel inspired by the WeightedDegreePositionStringKernel and the Gaussian kernel.
% The idea is to shift the dimensions of the input vectors against eachother. 'shift_step' is the step
% size of the shifts and max_shift is the maximal shift.
size_cache=10;
width=1.0;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% GaussianShift
disp('GaussianShift');
max_shift=2;
shift_step=1;
sg('set_kernel', 'GAUSSIANSHIFT', 'REAL', size_cache, width, max_shift, shift_step);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
% The HistogramWordString computes the TOP kernel on inhomogeneous Markov Chains.
size_cache=10;
order=3;
gap=0;
reverse='n';
addpath('tools');
label_train_dna=load_matrix('../data/label_train_dna.dat');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Plugin Estimate
disp('PluginEstimate w/ HistogramWord');
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
pseudo_pos=1e-1;
pseudo_neg=1e-1;
sg('new_plugin_estimator', pseudo_pos, pseudo_neg);
sg('set_labels', 'TRAIN', label_train_dna);
sg('train_estimator');
sg('set_kernel', 'HISTOGRAM', 'WORD', size_cache);
km=sg('get_kernel_matrix', 'TRAIN');
% not supported yet;
% lab=sg('plugin_estimate_classify');
km=sg('get_kernel_matrix', 'TEST');
% The LocalityImprovedString kernel is inspired by the polynomial kernel.
% Comparing neighboring characters it puts emphasize on local features.
%
% It can be defined as
% K({\bf x},{\bf x'})=\left(\sum_{i=0}^{T-1}\left(\sum_{j=-l}^{+l}w_jI_{i+j}({\bf x},{\bf x'})\right)^{d_1}\right)^{d_2},
% where
% I_i({\bf x},{\bf x'})=1
% if $x_i=x'_i and 0 otherwise.
%
rand('seed',17);
%sequence lengths, number of sequences
len=200;
num_train=500;
num_test=500;
num_a=2;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%locality improved kernel parameters
cache=100;
l=3;
d1=4;
d2=1;
%generate some toy data
acgt='ACGT';
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
traindat(aa,trainlab==1)='A';
testdat=acgt(ceil(4*rand(len,num_test)));
testlab=[-ones(1,num_test/2),ones(1,num_test/2)];
testdat(aa,testlab==1)='A';
%traindat'
%input('key to continue')
%train svm
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('set_labels', 'TRAIN', trainlab);
sg('set_kernel', 'SLIK', 'CHAR', cache, l, d1, d2);
sg('new_classifier', 'LIBSVM');
sg('c', C);
tic;sg('train_classifier');toc;
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'DNA');
sg('set_labels', 'TEST', testlab);
out1=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out1)==testlab))
out2=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out2)==testlab))
tic;out3=sg('classify');toc;
fprintf('accuracy: %f \n', mean(sign(out3)==testlab))
max(abs(out1-out2))
max(abs(out1-out3))
% This is an example for the initialization of a linear kernel on real valued
% data using scaling factor 1.2.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Linear
disp('Linear');
scale=1.2;
sg('set_kernel', 'LINEAR', 'REAL', size_cache, scale);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
% This is an example for the initialization of a linear kernel on raw byte
% data.
size_cache=10;
addpath('tools');
fm_train_byte=uint8(load_matrix('../data/fm_train_byte.dat'));
fm_test_byte=uint8(load_matrix('../data/fm_test_byte.dat'));
% LinearByte is b0rked
disp('LinearByte');
sg('set_kernel', 'LINEAR', 'BYTE', size_cache);
sg('set_features', 'TRAIN', fm_train_byte, 'RAWBYTE');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_byte, 'RAWBYTE');
km=sg('get_kernel_matrix', 'TEST');
% This is an example for the initialization of a linear kernel on string data. The
% strings are all of the same length and consist of the characters 'ACGT' corresponding
% to the DNA-alphabet. Each column of the matrices of type char corresponds to
% one training/test example.
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Linear String
disp('LinearString');
sg('set_kernel', 'LINEAR', 'CHAR', size_cache);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
% This is an example for the initialization of a linear kernel on word (2byte)
% data.
size_cache=10;
addpath('tools');
fm_train_word=uint16(load_matrix('../data/fm_train_word.dat'));
fm_test_word=uint16(load_matrix('../data/fm_test_word.dat'));
% LinearWord
disp('LinearWord');
scale=1.4;
sg('set_kernel', 'LINEAR', 'WORD', size_cache, scale);
sg('set_features', 'TRAIN', fm_train_word);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_word);
km=sg('get_kernel_matrix', 'TEST');
% This is an example for the initialization of the local alignment kernel on
% DNA sequences, where each column of the matrices of type char corresponds to
% one training/test example.
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Local Alignment String
disp('LocalAlignmentString');
sg('set_kernel', 'LOCALALIGNMENT', 'CHAR', size_cache);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
% This example initializes the locality improved string kernel. The locality improved string
% kernel is defined on sequences of the same length and inspects letters matching at
% corresponding positions in both sequences. The kernel sums over all matches in windows of
% length l and takes this sum to the power of 'inner_degree'. The sum over all these
% terms along the sequence is taken to the power of 'outer_degree'.
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Locality Improved String
disp('LocalityImprovedString');
length=5;
inner_degree=5;
outer_degree=inner_degree+2;
sg('set_kernel', 'LIK', 'CHAR', size_cache, length, inner_degree, outer_degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
% This is an example initializing the oligo string kernel which takes distances
% between matching oligos (k-mers) into account via a gaussian. Variable 'k' defines the length
% of the oligo and variable 'w' the width of the gaussian. The oligo string kernel is
% implemented for the DNA-alphabet 'ACGT'.
%
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Oligo String
k=3;
w=1.2;
sg('set_kernel', 'OLIGO', 'CHAR', size_cache, k, w);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
% This example initializes the polynomial kernel with real data.
% If variable 'inhomogene' is 'true' +1 is added to the scalar product
% before taking it to the power of 'degree'. If 'use_normalization' is
% set to 'true' then kernel matrix will be normalized by the square roots
% of the diagonal entries.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Poly
disp('Poly');
degree=4;
inhomogene=false;
use_normalization=true;
sg('set_kernel', 'POLY', 'REAL', size_cache, degree, inhomogene, use_normalization);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
% This is an example for the initialization of the PolyMatchString kernel on string data.
% The PolyMatchString kernel sums over the matches of two stings of the same length and
% takes the sum to the power of 'degree'. The strings consist of the characters 'ACGT' corresponding
% to the DNA-alphabet. Each column of the matrices of type char corresponds to
% one training/test example.
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Poly Match String
disp('PolyMatchString');
degree=3;
inhomogene=false;
sg('set_kernel', 'POLYMATCH', 'CHAR', size_cache, degree, inhomogene);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
% The PolyMatchWordString kernel is defined on strings of equal length.
% The kernel sums over the matches of two stings of the same length and
% takes the sum to the power of 'degree'. The strings in this example
% consist of the characters 'ACGT' corresponding to the DNA-alphabet. Each
% column of the matrices of type char corresponds to one training/test example.
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
order=3;
gap=0;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
use_sign=false;
normalization='FULL';
% Poly Match WordString
disp('PolyMatchWordString');
degree=2;
inhomogene=true;
sg('set_kernel', 'POLYMATCH', 'WORD', size_cache, degree, inhomogene);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
km=sg('get_kernel_matrix', 'TEST');
% The standard Sigmoid kernel computed on dense real valued features.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% sigmoid
disp('Sigmoid');
gamma=1.2;
coef0=1.3;
sg('set_kernel', 'SIGMOID', 'REAL', size_cache, gamma, coef0);
sg('set_features', 'TRAIN', fm_train_real);
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
km=sg('get_kernel_matrix', 'TEST');
% SimpleLocalityImprovedString kernel, is a ``simplified'' and better performing version of the Locality improved kernel.
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Simple Locality Improved String
disp('SimpleLocalityImprovedString');
length=5;
inner_degree=5;
outer_degree=inner_degree+2;
sg('set_kernel', 'SLIK', 'CHAR', size_cache, length, inner_degree, outer_degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
% The well known Gaussian kernel (swiss army knife for SVMs) on sparse real valued features.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Sparse Gaussian
disp('SparseGaussian');
width=1.3;
sg('set_kernel', 'GAUSSIAN', 'SPARSEREAL', size_cache, width);
sg('set_features', 'TRAIN', sparse(fm_train_real));
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', sparse(fm_test_real));
km=sg('get_kernel_matrix', 'TEST');
% Computes the standard linear kernel on sparse real valued features.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Sparse Linear
disp('SparseLinear');
scale=1.3;
sg('set_kernel', 'LINEAR', 'SPARSEREAL', size_cache, scale);
sg('set_features', 'TRAIN', sparse(fm_train_real));
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', sparse(fm_test_real));
km=sg('get_kernel_matrix', 'TEST');
% Computes the standard polynomial kernel on sparse real valued features.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% Sparse Poly
disp('SparsePoly');
degree=3;
inhomogene=true;
use_normalization=false;
sg('set_kernel', 'POLY', 'SPARSEREAL', size_cache, degree, inhomogene, use_normalization);
sg('set_features', 'TRAIN', sparse(fm_train_real));
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', sparse(fm_test_real));
km=sg('get_kernel_matrix', 'TEST');
% The CommUlongString kernel may be used to compute the spectrum kernel from strings that have been mapped into unsigned 64bit integers.
% These 64bit integers correspond to k-mers. To be applicable in this kernel they need to be sorted (e.g. via the SortUlongString pre-processor).
% It basically uses the algorithm in the unix "comm" command (hence the name) to compute the kernel function.
% In this feature vector each entry denotes how often the k-mer appears in that . Note that this representation enables spectrum kernels of
% order 8 for 8bit alphabets (like binaries) and order 32 for 2-bit alphabets like DNA. For this kernel the linadd speedups are implemented
% (though there is room for improvement here when a whole set of sequences is ADDed) using sorted lists.
rand('seed',17);
%sequence lengths, number of sequences
len=100;
num_train=1000;
num_test=5000;
num_a=5;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%Spectrum kernel parameters
order=5;
cache=10;
use_sign=true;
normalization='FULL'; %NO,SQRT,LEN,SQLEN,FULL
%generate some toy data
acgt='ACGT';
shift=40;
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
testdat=acgt(ceil(4*rand(len,num_test)));
testlab=[-ones(1,num_test/2),ones(1,num_test/2)];
aas=floor((shift+1)*rand(num_test,1));
idx=find(testlab==1);
for i=1:length(idx),
testdat(aa+aas(i),idx(i))='A';
end
%traindat'
%input('key to continue')
%train svm
sg('use_linadd', true);
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('set_labels', 'TRAIN', trainlab);
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'COMMSTRING', 'WORD', cache, use_sign, normalization);
sg('new_classifier', 'SVMLIGHT');
sg('c', C);
sg('train_classifier');
sg('init_kernel_optimization');
%evaluate svm on train data
sg('set_features', 'TEST', traindat, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
sg('attach_preproc', 'TEST');
sg('set_labels', 'TEST', trainlab);
out=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out)==trainlab))
%evaluate svm on test data
sg('set_features', 'TEST', testdat, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1);
sg('attach_preproc', 'TEST');
sg('set_labels', 'TEST', testlab);
out=sg('classify');
fprintf('accuracy: %f \n', mean(sign(out)==testlab))
% The WeightedCommWordString kernel may be used to compute the weighted
% spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer
% length is weighted by some coefficient $\beta_k$ from strings that have
% been mapped into unsigned 16bit integers.
%
% These 16bit integers correspond to k-mers. To applicable in this kernel they
% need to be sorted (e.g. via the SortWordString pre-processor).
%
% It basically uses the algorithm in the unix "comm" command (hence the name)
% to compute:
%
% k({\bf x},({\bf x'})= \sum_{k=1}^K\beta_k\Phi_k({\bf x})\cdot \Phi_k({\bf x'})
%
% where $\Phi_k$ maps a sequence ${\bf x}$ that consists of letters in
% $\Sigma$ to a feature vector of size $|\Sigma|^k$. In this feature
% vector each entry denotes how often the k-mer appears in that ${\bf x}$.
%
% Note that this representation is especially tuned to small alphabets
% (like the 2-bit alphabet DNA), for which it enables spectrum kernels
% of order 8.
%
% For this kernel the linadd speedups are quite efficiently implemented using
% direct maps.
%
rand('seed',17);
%sequence lengths, number of sequences
len=100;
num_train=10;
num_a=5;
aa=(round(len/2-num_a/2)):(round(len/2+num_a/2-1));
%SVM regularization factor C
C=1;
%Spectrum kernel parameters
order=8;
cache=10;
use_sign=false;
normalization='NO'; %NO,SQRT,LEN,SQLEN,FULL
%generate some toy data
acgt='ACGT';
shift=40;
rand('state',1);
traindat=acgt(ceil(4*rand(len,num_train)));
trainlab=[-ones(1,num_train/2),ones(1,num_train/2)];
aas=floor((shift+1)*rand(num_train,1));
idx=find(trainlab==1);
for i=1:length(idx),
traindat(aa+aas(i),idx(i))='A';
end
%%% spec
weights=(order:-1:1);
weights=weights/sum(weights);
km=zeros(size(traindat,2));
for o=1:order,
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', o, order-1);
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'COMMSTRING', 'WORD',cache, use_sign, normalization);
km=km+weights(o)*sg('get_kernel_matrix', 'TRAIN');
end
%%% wdspec
sg('set_features', 'TRAIN', traindat, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, 0, 'r');
sg('add_preproc', 'SORTWORDSTRING');
sg('attach_preproc', 'TRAIN');
sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', cache, use_sign, normalization);
wkm=sg('get_kernel_matrix', 'TRAIN');
max(abs(wkm(:)-km(:)))
% The WeightedCommWordString kernel may be used to compute the weighted
% spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer
% length is weighted by some coefficient \f$\beta_k\f$) from strings that have
% been mapped into unsigned 16bit integers.
%
% These 16bit integers correspond to k-mers. To applicable in this kernel they
% need to be sorted (e.g. via the SortWordString pre-processor).
%
% It basically uses the algorithm in the unix "comm" command (hence the name)
% to compute:
%
% k({\bf x},({\bf x'})= \sum_{k=1}^K\beta_k\Phi_k({\bf x})\cdot \Phi_k({\bf x'})
%
% where \f$\Phi_k\f$ maps a sequence \f${\bf x}\f$ that consists of letters in
% \f$\Sigma\f$ to a feature vector of size \f$|\Sigma|^k\f$. In this feature
% vector each entry denotes how often the k-mer appears in that \f${\bf x}\f$.
%
% Note that this representation is especially tuned to small alphabets
% (like the 2-bit alphabet DNA), for which it enables spectrum kernels
% of order 8.
%
% For this kernel the linadd speedups are quite efficiently implemented using
% direct maps.
%
size_cache=10;
use_sign=0;
reverse='r';
order=8;
gap=0;
normalization='FULL';
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Weighted Comm Word String
disp('WeightedCommWordString');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
% The Weighted Degree Position String kernel (Weighted Degree kernel with shifts).
%
% The WD-shift kernel of order d compares two sequences X and
% Y of length L by summing all contributions of k-mer matches of
% lengths k in 1...d, weighted by coefficients beta_k
% allowing for a positional tolerance of up to shift s.
%
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Weighted Degree Position String
disp('WeightedDegreePositionString');
degree=20;
sg('set_kernel', 'WEIGHTEDDEGREEPOS', 'CHAR', size_cache, degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
% The Weighted Degree String kernel.
%
% The WD kernel of order d compares two sequences X and
% Y of length L by summing all contributions of k-mer matches of
% lengths k in 1...d , weighted by coefficients beta_k. It
% is defined as
%
% k(X, Y)=\sum_{k=1}^d\beta_k\sum_{l=1}^{L-k+1}I(u_{k,l}(X)=u_{k,l}(Y)).
%
% Here, $u_{k,l}(X)$ is the string of length k starting at position
% l of the sequence X and I(.) is the indicator function
% which evaluates to 1 when its argument is true and to 0
% otherwise.
%
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
% Weighted Degree String
disp('WeightedDegreeString');
degree=20;
sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
km=sg('get_kernel_matrix', 'TEST');
c=ones(10,1);
H=eye(10);
A=zeros(10);
A=ones(1,10);
b=1;
l=zeros(10,1);
u=ones(10,1);
%tic;
%[x,y] = pr_loqo2(c, H, A, b, l, u);
%toc
tic;
[x2,y2] = sg('pr_loqo',c', H, A, b, l', u');
toc
% This script should enable you to rerun the experiment in the
% paper that we labeled with "christmas star".
%
% The task is to classify two star-shaped classes that share the
% midpoint. The difficulty of the learning problem depends on the
% distance between the classes, which is varied
%
% Our model selection leads to a choice of C = 0.5. The model
% selection is not repeated inside this script.
% Preliminary settings:
C = 0.5; % SVM Parameter
cache_size = 50; % cache per kernel in MB
svm_eps=1e-3; % svm epsilon
mkl_eps=1e-3; % mkl epsilon
no_obs = 50; % number of observations / data points (sum for train and test and both classes)
% 2000 was used in the paper
k_star = 20; % number of "leaves" of the stars
alpha = 0.3; % noise level of the data
radius_star(:,1) = [4.1:0.2:10]'; % increasing radius of the 1.class
radius_star(:,2) = 4*ones(length(radius_star(:,1)),1); % fixed radius 2.class
% distanz between the classes: diff(radius_star(:,1)-radius_star(:,2))
rbf_width = [0.01 0.1 1 10 100]; % different width for the five used rbf kernels
mkl_norm = 1; % >=1
ent_lambda = 0; % 0<=lambda<=1
rand('state', 17);
randn('state', 17);
%%%%
%%%% Great loop: train MKL for every data set (the different distances between the stars)
%%%%
%sg('loglevel', 'ALL');
%sg('echo', 'ON');
for kk = 1:size(radius_star,1)
% data generation
fprintf('MKL for radius %+02.2f \n', radius_star(kk,1))
dummy(1,:) = rand(1,4*no_obs);
noise = alpha*randn(1,4*no_obs);
dummy(2,:) = sin(k_star*pi*dummy(1,:)) + noise; % sine
dummy(2,1:2*no_obs) = dummy(2,1:2*no_obs)+ radius_star(kk,1); % distanz shift: first class
dummy(2,(2*no_obs+1):end) = dummy(2,(2*no_obs+1):end)+ radius_star(kk,2); % distanz shift: second class
dummy(1,: ) = 2*pi*dummy(1,:);
x(1,:) = dummy(2,:).*sin(dummy(1,:));
x(2,:) = dummy(2,:).*cos(dummy(1,:));
train_y = [-ones(1,no_obs) ones(1,no_obs)];
test_y = [-ones(1,no_obs) ones(1,no_obs)];
train_x = x(:,1:2:end);
test_x = x(:,2:2:end);
clear dummy x;
% train MKL
sg('clean_kernel');
sg('clean_features', 'TRAIN');
sg('add_features','TRAIN', train_x); % set a trainingset for every SVM
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('set_labels','TRAIN', train_y); % set the labels
sg('new_classifier', 'MKL_CLASSIFICATION');
sg('mkl_use_interleaved_optimization', 1); % 0, 1
sg('set_solver', 'ELASTICNET'); % DIRECT, NEWTON, CPLEX, AUTO, GLPK, ELASTICNET
%sg('set_constraint_generator', 'LIBSVM');
sg('mkl_parameters', mkl_eps, 0, mkl_norm);
sg('elasticnet_lambda',ent_lambda);
sg('svm_epsilon', svm_eps);
sg('set_kernel', 'COMBINED', 0);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(1));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(2));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(3));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(4));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(5));
sg('c', C);
sg('train_classifier');
[b,alphas]=sg('get_svm') ;
w(kk,:) = sg('get_subkernel_weights');
% calculate train error
sg('clean_features', 'TEST');
sg('add_features','TEST',train_x);
sg('add_features','TEST',train_x);
sg('add_features','TEST',train_x);
sg('add_features','TEST',train_x);
sg('add_features','TEST',train_x);
sg('set_labels','TEST', train_y);
sg('set_threshold', 0);
result.trainout(kk,:)=sg('classify');
result.trainerr(kk) = mean(train_y~=sign(result.trainout(kk,:)),2);
% calculate test error
sg('clean_features', 'TEST');
sg('add_features','TEST',test_x);
sg('add_features','TEST',test_x);
sg('add_features','TEST',test_x);
sg('add_features','TEST',test_x);
sg('add_features','TEST',test_x);
sg('set_labels','TEST',test_y);
sg('set_threshold', 0);
result.testout(kk,:)=sg('classify');
result.testerr(kk) = mean(test_y~=sign(result.testout(kk,:)),2);
end
disp('done. now w contains the kernel weightings and result test/train outputs and errors')
% Explicit examples on how to use the different classifiers
size_cache=10;
C=1.2;
use_bias=false;
epsilon=1e-5;
width=1.2;
mkl_eps=0.001;
mkl_norm=2;
max_train_time=600;
addpath('tools');
label_train_multiclass=load_matrix('../data/label_train_multiclass.dat');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
% MKL_MULTICLASS
disp('MKL_MULTICLASS');
sg('new_classifier', 'MKL_MULTICLASS');
disp('Combined');
sg('clean_kernel');
sg('clean_features','TRAIN');
sg('clean_features','TEST');
sg('set_kernel', 'COMBINED', size_cache);
sg('add_kernel', 1, 'LINEAR', 'REAL', size_cache);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', size_cache, 1);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('add_kernel', 1, 'POLY', 'REAL', size_cache, 2);
sg('add_features', 'TRAIN', fm_train_real);
sg('add_features', 'TEST', fm_test_real);
sg('set_labels', 'TRAIN', label_train_multiclass);
sg('svm_epsilon', epsilon);
sg('c', C);
sg('mkl_parameters', mkl_eps, 0, mkl_norm);
sg('train_classifier');
result=sg('classify');
result
% This script should enable you to rerun the experiment in the
% paper that we labeled "mixture linear and sine ".
%
% The task is to learn a regression function where the true function
% is given by a mixture of 2 sine waves in addition to a linear trend.
% We vary the frequency of the second higher frequency sine wave.
% Setup: MKL on 10 RBF kernels of different widths on 1000 examples
% Preliminary setting
% kernel width for 10 basic SVMs
rbf_width(1) = 0.001;
rbf_width(2) = 0.005;
rbf_width(3) = 0.01;
rbf_width(4) = 0.05;
rbf_width(5) = 0.1;
rbf_width(6) = 1;
rbf_width(7) = 10;
rbf_width(8) = 50;
rbf_width(9) = 100;
rbf_width(10) = 1000;
mkl_norm = 1; % >=1
% SVM parameter
C = 1;
cache_size = 50;
mkl_eps = 1e-4;
svm_eps = 1e-4;
svr_tube = 0.01;
debug = 0;
% data
f = [0:20]; % parameter that varies the frequency of the second sine wave
no_obs = 20; % number of observations
if debug
sg('loglevel', 'ALL');
sg('echo', 'ON');
else
sg('loglevel', 'ERROR');
sg('echo', 'OFF');
end
for kk = 1:length(f) % Big loop
% data generation
train_x = [0:((4*pi)/(no_obs-1)):4*pi];
trend = 2 * train_x* ((pi)/(max(train_x)-min(train_x)));
wave1 = sin(train_x);
wave2 = sin(f(kk)*train_x);
train_y = trend + wave1 + wave2;
% MKL learning
kernels={};
sg('new_classifier', 'MKL_REGRESSION');
sg('mkl_parameters', mkl_eps, 0, mkl_norm);
sg('mkl_use_interleaved_optimization', 1); % 0, 1
sg('set_solver', 'DIRECT'); % DIRECT, NEWTON, CPLEX, AUTO, GLPK, ELASTICNET
sg('c', C);
sg('svm_epsilon',svm_eps);
sg('svr_tube_epsilon',svr_tube);
sg('clean_features', 'TRAIN');
sg('clean_kernel');
sg('set_labels', 'TRAIN', train_y); % set labels
sg('add_features','TRAIN', train_x); % add features for every basic SVM
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('set_kernel', 'COMBINED', 0);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(1));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(2));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(3));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(4));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(5));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(6));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(7));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(8));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(9));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(10));
sg('train_regression');
weights(kk,:) = sg('get_subkernel_weights') ;
fprintf('frequency: %02.2f rbf-kernel-weights: %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f %02.2f \n', f(kk), weights(kk,:))
end
% This script should enable you to rerun the experiment in the
% paper that we labeled "sine".
%
% In this regression task a sine wave is to be learned.
% We vary the frequency of the wave.
% Preliminary settings:
% Parameter for the SVMs.
C = 10; % obtained via model selection (not included in the script)
cache_size = 10;
mkl_eps = 1e-4; % threshold for precision
svm_eps = 1e-4;
svr_tube_eps = 1e-3;
debug = 0;
% Kernel width for the 5 "basic" SVMs
rbf_width(1) = 0.005;
rbf_width(2) = 0.05;
rbf_width(3) = 0.5;
rbf_width(4) = 1;
rbf_width(5) = 10;
mkl_norm = 1; % >=1
% data
f = [0.1:0.2:5]; % values for the different frequencies
no_obs = 100; % number of observations
if debug
sg('loglevel', 'ALL');
sg('echo', 'ON');
else
sg('loglevel', 'ERROR');
sg('echo', 'OFF');
end
for kk = 1:length(f) % big loop for the different learning problems
% data generation
train_x = [1:(((10*2*pi)-1)/(no_obs-1)):10*2*pi];
train_y = sin(f(kk)*train_x);
kernels={};
% initialize MKL-SVR
sg('new_regression', 'MKL_REGRESSION');
sg('mkl_parameters', mkl_eps, 0, mkl_norm);
sg('mkl_use_interleaved_optimization', 1); % 0, 1
sg('set_solver', 'GLPK'); % DIRECT, NEWTON, CPLEX, AUTO, GLPK, ELASTICNET
sg('c', C);
sg('svm_epsilon', svm_eps);
sg('svr_tube_epsilon', svr_tube_eps);
sg('clean_features', 'TRAIN');
sg('clean_kernel');
sg('set_labels', 'TRAIN', train_y); % set labels
sg('add_features','TRAIN', train_x); % add features for every SVR
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('add_features','TRAIN', train_x);
sg('set_kernel', 'COMBINED', 0);
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(1));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(2));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(3));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(4));
sg('add_kernel', 1, 'GAUSSIAN', 'REAL', cache_size, rbf_width(5));
sg('train_regression');
weights(kk,:) = sg('get_subkernel_weights') ;
fprintf('frequency: %02.2f rbf-kernel-weights: %02.2f %02.2f %02.2f %02.2f %02.2f \n', f(kk), weights(kk,:))
end
% In this example a kernel matrix is computed for a given real-valued data set.
% The kernel used is the Chi2 kernel which operates on real-valued vectors. It
% computes the chi-squared distance between sets of histograms. It is a very
% useful distance in image recognition (used to detect objects). The preprocessor
% LogPlusOne adds one to a dense real-valued vector and takes the logarithm of
% each component of it. It is most useful in situations where the inputs are
% counts: When one compares differences of small counts any difference may matter
% a lot, while small differences in large counts don't. This is what this log
% transformation controls for.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
width=1.4;
% LogPlusOne
disp('LogPlusOne');
sg('add_preproc', 'LOGPLUSONE');
sg('set_kernel', 'CHI2', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
% In this example a kernel matrix is computed for a given real-valued data set.
% The kernel used is the Chi2 kernel which operates on real-valued vectors. It
% computes the chi-squared distance between sets of histograms. It is a very
% useful distance in image recognition (used to detect objects). The preprocessor
% NormOne, normalizes vectors to have norm 1.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
width=1.4;
% NormOne
disp('NormOne');
sg('add_preproc', 'NORMONE');
sg('set_kernel', 'CHI2', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
% In this example a kernel matrix is computed for a given real-valued data set.
% The kernel used is the Chi2 kernel which operates on real-valued vectors. It
% computes the chi-squared distance between sets of histograms. It is a very
% useful distance in image recognition (used to detect objects). The preprocessor
% PruneVarSubMean substracts the mean from each feature and removes features that
% have zero variance.
size_cache=10;
addpath('tools');
fm_train_real=load_matrix('../data/fm_train_real.dat');
fm_test_real=load_matrix('../data/fm_test_real.dat');
width=1.4;
% PruneVarSubMean
disp('PruneVarSubMean');
divide_by_std=true;
sg('add_preproc', 'PRUNEVARSUBMEAN', divide_by_std);
sg('set_kernel', 'CHI2', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train_real);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_real);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
% In this example a kernel matrix is computed for a given string data set. The
% CommUlongString kernel is used to compute the spectrum kernel from strings that
% have been mapped into unsigned 64bit integers. These 64bit integers correspond
% to k-mers. To be applicable in this kernel the mapped k-mers have to be sorted.
% This is done using the SortUlongString preprocessor, which sorts the indivual
% strings in ascending order. The kernel function basically uses the algorithm in
% the unix "comm" command (hence the name). Note that this representation enables
% spectrum kernels of order 8 for 8bit alphabets (like binaries) and order 32 for
% 2-bit alphabets like DNA. For this kernel the linadd speedups are implemented
% (though there is room for improvement here when a whole set of sequences is
% ADDed) using sorted lists.
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
width=1.4;
%
% complex string features;
%
order=3;
gap=0;
reverse='n'; % bit silly to not use boolean, set 'r' to yield true
use_sign=false;
normalization='FULL';
% SortUlongString
disp('CommUlongString');
sg('add_preproc', 'SORTULONGSTRING');
sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
% In this example a kernel matrix is computed for a given string data set. The
% CommWordString kernel is used to compute the spectrum kernel from strings that
% have been mapped into unsigned 16bit integers. These 16bit integers correspond
% to k-mers. To be applicable in this kernel the mapped k-mers have to be sorted.
% This is done using the SortWordString preprocessor, which sorts the indivual
% strings in ascending order. The kernel function basically uses the algorithm in
% the unix "comm" command (hence the name). Note that this representation is
% especially tuned to small alphabets (like the 2-bit alphabet DNA), for which it
% enables spectrum kernels of order up to 8. For this kernel the linadd speedups
% are quite efficiently implemented using direct maps.
size_cache=10;
addpath('tools');
fm_train_dna=load_matrix('../data/fm_train_dna.dat');
fm_test_dna=load_matrix('../data/fm_test_dna.dat');
width=1.4;
order=3;
gap=0;
reverse='n';
use_sign=false;
normalization='FULL';
% SortWordString
disp('CommWordString');
sg('add_preproc', 'SORTWORDSTRING');
sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization);
sg('set_features', 'TRAIN', fm_train_dna, 'DNA');
sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TRAIN');
km=sg('get_kernel_matrix', 'TRAIN');
sg('set_features', 'TEST', fm_test_dna, 'DNA');
sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse);
sg('attach_preproc', 'TEST');
km=sg('get_kernel_matrix', 'TEST');
% In this example a kernelized version of ridge regression (KRR) is trained on a
% real-valued data set. The KRR is trained with regularization parameter tau=1e-6
% and a gaussian kernel with width=0.8.
size_cache=10;
width=2.1;
C=1.2;
tube_epsilon=1e-2;
addpath('tools');
label_train=load_matrix('../data/label_train_twoclass.dat');
fm_train=load_matrix('../data/fm_train_real.dat');
fm_test=load_matrix('../data/fm_test_real.dat');
% KRR
disp('KRR');
tau=1.2;
sg('set_features', 'TRAIN', fm_train);
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_labels', 'TRAIN', label_train);
sg('new_regression', 'KRR');
sg('krr_tau', tau);
sg('c', C);
sg('train_regression');
sg('set_features', 'TEST', fm_test);
result=sg('classify');
% In this example a support vector regression algorithm is trained on a
% real-valued toy data set. The underlying library used for the SVR training is
% LIBSVM. The SVR is trained with regularization parameter C=1 and a gaussian
% kernel with width=2.1.
%
% For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/ .
size_cache=10;
width=2.1;
C=1.2;
tube_epsilon=1e-2;
addpath('tools');
label_train=load_matrix('../data/label_train_twoclass.dat');
fm_train=load_matrix('../data/fm_train_real.dat');
fm_test=load_matrix('../data/fm_test_real.dat');
% LibSVR
disp('LibSVR');
sg('set_features', 'TRAIN', fm_train);
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_labels', 'TRAIN', label_train);
sg('new_regression', 'LIBSVR');
sg('svr_tube_epsilon', tube_epsilon);
sg('c', C);
sg('train_regression');
sg('set_features', 'TEST', fm_test);
result=sg('classify');
% In this example a support vector regression algorithm is trained on a
% real-valued toy data set. The underlying library used for the SVR training is
% SVM^light. The SVR is trained with regularization parameter C=1 and a gaussian
% kernel with width=2.1.
%
% For more details on the SVM^light see
% T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
% Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
size_cache=10;
width=2.1;
C=1.2;
tube_epsilon=1e-2;
addpath('tools');
label_train=load_matrix('../data/label_train_twoclass.dat');
fm_train=load_matrix('../data/fm_train_real.dat');
fm_test=load_matrix('../data/fm_test_real.dat');
% SVR Light
try
disp('SVRLight');
sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width);
sg('set_features', 'TRAIN', fm_train);
sg('set_labels', 'TRAIN', label_train);
sg('new_regression', 'SVRLIGHT');
sg('svr_tube_epsilon', tube_epsilon);
sg('c', C);
sg('train_regression');
sg('set_features', 'TEST', fm_test);
result=sg('classify');
catch
disp('No support for SVRLight available.')
end
% In this example we use the dynamic progaramm implementation with a
% gene finding specific model. The model and the training parameter
% are stored in a file and are used to create a gene prediction on
% some example sequence.
%% load data
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
load('-mat', '../data/DynProg_example.dat')
%% set a number of defaults
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
use_orf = 1;
num_svms = 8;
use_long_transitions = 1;
threshold = 1000;
long_transition_max_len = 100000;
block.content_pred(end+1:num_svms,:) = deal(0);
viterbi_nbest = [1 0] ;
%% reshape the training parameters and additional information like
%% length constraints and transformation type and pass them to shogun
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
for j=1:length(penalty_array)
all_ids(j) = penalty_array{j}.id;
all_names{j} = penalty_array{j}.name;
all_limits(:,j) = penalty_array{j}.limits;
all_penalties(:,j) = penalty_array{j}.penalties;
if isempty(penalty_array{j}.transform)
all_transform{j} = 'linear';
else
all_transform{j} = penalty_array{j}.transform;
end
all_min_values(j) = penalty_array{j}.min_value;
all_max_values(j) = penalty_array{j}.max_value;
all_use_cache(j) = penalty_array{j}.use_cache;
all_use_svm(j) = penalty_array{j}.use_svm;
all_do_calc(j) = 1;
end
sg('set_plif_struct',int32(all_ids)-1,all_names, all_limits, all_penalties, all_transform,...
all_min_values, all_max_values, int32(all_use_cache), int32(all_use_svm), int32(all_do_calc));
%% pass the data to shogun
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
sg('init_dyn_prog', num_svms)
sg('set_lin_feat', block.seq, int32(block.all_pos-1), block.content_pred);
sg('set_model', model.transition_pointers, use_orf, int32(model.mod_words), int32(state_signals),int32(model.orf_info))
sg('set_feature_matrix', block.features)
sg('long_transition_settings', use_long_transitions, threshold, long_transition_max_len)
%% run the dynamic program
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
[path_scores, path, ppos]= sg('best_path_trans', model.p', model.q', int32(viterbi_nbest), seg_path, a_trans, loss);