This page lists ready to run shogun examples for the R Modular interface.
To run the examples issue
R -f name_of_example.R
or start R and then type
source('name_of_example.R')
# In this example a multi-class support vector machine is trained on a toy data
# set and the trained classifier is then used to predict labels of test
# examples. The training algorithm is based on BSVM formulation (L2-soft margin
# and the bias added to the objective function) which is solved by the Improved
# Mitchell-Demyanov-Malozemov algorithm. The training algorithm uses the Gaussian
# kernel of width 2.1 and the regularization constant C=1. The solver stops if the
# relative duality gap falls below 1e-5.
#
# For more details on the used SVM solver see
# V.Franc: Optimization Algorithms for Kernel Methods. Research report.
# CTU-CMP-2005-22. CTU FEL Prague. 2005.
# ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-PhD.pdf .
#
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1)
# gmnpsvm
print('GMNPSVM')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)
C <- 1.3
epsilon <- 1e-5
num_threads <- as.integer(1)
labels <- Labels(label_train_multiclass)
svm <- GMNPSVM(C, kernel, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$train(svm)
dump <- kernel$init(kernel, feats_train, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is then used to predict labels of test
# examples. As training algorithm Gradient Projection Decomposition Technique
# (GPDT) is used with SVM regularization parameter C=1 and a Gaussian
# kernel of width 2.1. The solver returns an epsilon-precise (epsilon=1e-5) solution.
#
# For more details on GPDT solver see http://dm.unife.it/gpdt .
#
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)
# gpbtsvm
print('GPBTSVM')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)
C <- 0.017
epsilon <- 1e-5
num_threads <- as.integer(2)
labels <- Labels(label_train_twoclass)
svm <- GPBTSVM(C, kernel, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$train(svm)
dump <- kernel$init(kernel, feats_train, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# This example shows usage of a k-nearest neighbor (KNN) classification rule on
# a toy data set. The number of the nearest neighbors is set to k=3 and the distances
# are measured by the Euclidean metric. Finally, the KNN rule is applied to predict
# labels of test examples.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1)
# knn
print('KNN')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
distance <- EuclidianDistance()
k <- as.integer(3)
num_threads <- as.integer(1)
labels <- Labels(label_train_multiclass)
knn <- KNN(k, distance, labels)
dump <- knn$parallel$set_num_threads(knn$parallel, num_threads)
dump <- knn$train(knn, feats_train)
lab <- knn$classify(knn, feats_test)
out <- lab$get_labels(lab)
# In this example a two-class linear classifier based on the Linear Discriminant
# Analysis (LDA) is trained on a toy data set and then the trained classifier is
# used to predict test examples. The regularization parameter, which corresponds
# to a weight of a unitary matrix added to the covariance matrix, is set to
# gamma=3.
#
# For more details on the LDA see e.g.
# http://en.wikipedia.org/wiki/Linear_discriminant_analysis
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)
# lda
print('LDA')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
gamma <- 3
labels <- Labels(label_train_twoclass)
lda <- LDA(gamma, feats_train, labels)
dump <- lda$train(lda)
dump <- lda$get_bias(lda)
dump <- lda$get_w(lda)
dump <- lda$set_features(lda, feats_test)
lab <- lda$classify(lda)
out <- lab$get_labels(lab)
# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is then used to predict labels of
# test examples. As training algorithm the LIBLINEAR solver is used with the SVM
# regularization parameter C=0.9 and the bias in the classification rule switched
# on and the precision parameters epsilon=1e-5.
#
# For more details on LIBLINEAR see
# http://www.csie.ntu.edu.tw/~cjlin/liblinear/
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)
# liblinear
print('LibLinear')
realfeat <- RealFeatures(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feats_train, realfeat)
realfeat <- RealFeatures(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feats_test, realfeat)
C <- 1.42
epsilon <- 1e-5
num_threads <- as.integer(1)
labels <- Labels(label_train_twoclass)
svm <- LibLinear(C, feats_train, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$set_bias_enabled(svm, TRUE)
dump <- svm$train(svm)
dump <- svm$set_features(svm, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the LIBSVM solver is used with SVM
# regularization parameter C=1 and a Gaussian kernel of width 2.1 and the
# precision parameter epsilon=1e-5. The example also shows how to retrieve the
# support vectors from the train SVM model.
#
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)
# libsvm
print('LibSVM')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)
C <- 1.017
epsilon <- 1e-5
num_threads <- as.integer(2)
labels <- Labels(label_train_twoclass)
svm <- LibSVM(C, kernel, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$train(svm)
dump <- kernel$init(kernel, feats_train, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example a multi-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the LIBSVM solver is used with SVM
# regularization parameter C=1 and a Gaussian kernel of width 2.1 and the
# precision parameter epsilon=1e-5.
#
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1)
# libsvmmulticlass
print('LibSVMMultiClass')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)
C <- 1.017
epsilon <- 1e-5
num_threads <- as.integer(8)
labels <- Labels(label_train_multiclass)
svm <- LibSVMMultiClass(C, kernel, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$train(svm)
dump <- kernel$init(kernel, feats_train, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example a one-class support vector machine classifier is trained on a
# toy data set. The training algorithm finds a hyperplane in the RKHS which
# separates the training data from the origin. The one-class classifier is
# typically used to estimate the support of a high-dimesnional distribution.
# For more details see e.g.
# B. Schoelkopf et al. Estimating the support of a high-dimensional
# distribution. Neural Computation, 13, 2001, 1443-1471.
#
# In the example, the one-class SVM is trained by the LIBSVM solver with the
# regularization parameter C=1 and the Gaussian kernel of width 2.1 and the
# precision parameter epsilon=1e-5.
#
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# libsvm oneclass
print('LibSVMOneClass')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)
C <- 1.017
epsilon <- 1e-5
num_threads <- as.integer(4)
svm <- LibSVMOneClass(C, kernel)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$train(svm)
dump <- kernel$init(kernel, feats_train, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the Minimal Primal Dual SVM is used with SVM
# regularization parameter C=1 and a Gaussian kernel of width 1.2 and the
# precision parameter 1e-5.
#
# For more details on the MPD solver see
# Kienzle, W. and B. Schölkopf: Training Support Vector Machines with Multiple
# Equality Constraints. Machine Learning: ECML 2005, 182-193. (Eds.) Carbonell,
# J. G., J. Siekmann, Springer, Berlin, Germany (11 2005)
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1)
# libsvmmulticlass
print('LibSVMMultiClass')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)
C <- 1.2
epsilon <- 1e-5
num_threads <- as.integer(8)
labels <- Labels(label_train_multiclass)
svm <- LibSVMMultiClass(C, kernel, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$train(svm)
dump <- kernel$init(kernel, feats_train, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# This example shows usage of the Perceptron algorithm for training a two-class
# linear classifier, i.e. y = sign( <x,w>+b). The Perceptron algorithm works by
# iteratively passing though the training examples and applying the update rule on
# those examples which are misclassified by the current classifier. The Perceptron
# update rule reads
#
# w(t+1) = w(t) + alpha * y_t * x_t
# b(t+1) = b(t) + alpha * y_t
#
# where (x_t,y_t) is feature vector and label (must be +1/-1) of the misclassified example
# (w(t),b(t)) are the current parameters of the linear classifier
# (w(t+1),b(t+1)) are the new parameters of the linear classifier
# alpha is the learning rate; in this examples alpha=1
#
# The Perceptron algorithm iterates until all training examples are correctly
# classified or the prescribed maximal number of iterations, in this example
# max_iter=1000, is reached.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)
# perceptron
print('Perceptron')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
learn_rate <- 1.
max_iter <- as.integer(1000)
num_threads <- as.integer(1)
labels <- Labels(label_train_twoclass)
perceptron <- Perceptron(feats_train, labels)
dump <- perceptron$set_learn_rate(perceptron, learn_rate)
dump <- perceptron$set_max_iter(perceptron, max_iter)
dump <- perceptron$train(perceptron)
dump <- perceptron$set_features(perceptron, feats_test)
lab <- perceptron$classify(perceptron)
out <- lab$get_labels(lab)
# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the steepest descent subgradient algorithm is
# used. The SVM regularization parameter is set to C=0.9 and the bias in the
# classification rule is switched off. The solver iterates until it finds an
# epsilon-precise solution (epsilon=1e-3) or the maximal training time
# max_train_time=1 (seconds) is exceeded. The unbiased linear rule is trained.
#
# Note that this solver often does not converges because the steepest descent
# subgradient algorithm is oversensitive to rounding errors. Note also that this
# is an unpublished work which was predecessor of the OCAS solver (see
# classifier_svmocas).
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)
# subgradient based svm
print('SubGradientSVM')
realfeat <- RealFeatures(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feats_train, realfeat)
realfeat <- RealFeatures(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feats_test, realfeat)
C <- 1.42
epsilon <- 1e-3
num_threads <- as.integer(1)
max_train_time <- 1.
labels <- Labels(label_train_twoclass)
svm <- SubGradientSVM(C, feats_train, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$set_bias_enabled(svm, FALSE)
dump <- svm$set_max_train_time(svm, max_train_time)
dump <- svm$train(svm)
dump <- svm$set_features(svm, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example a two-class support vector machine classifier is trained on a
# DNA splice-site detection data set and the trained classifier is used to predict
# labels on test set. As training algorithm SVM^light is used with SVM
# regularization parameter C=1.2 and the Weighted Degree kernel of degree 20 and
# the precision parameter epsilon=1e-5.
#
# For more details on the SVM^light see
# T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
# Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
#
# For more details on the Weighted Degree kernel see
# G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively
# spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
label_train_dna <- as.real(read.table('../data/label_train_dna.dat')$V1)
# svm light
dosvmlight <- function()
{
print('SVMLight')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
degree <- as.integer(20)
kernel <- WeightedDegreeStringKernel(feats_train, feats_train, degree)
C <- 1.017
epsilon <- 1e-5
num_threads <- as.integer(3)
labels <- Labels(as.real(label_train_dna))
svm <- SVMLight(C, kernel, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$train(svm)
dump <- kernel$init(kernel, feats_train, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
}
try(dosvmlight())
# In this example a two-class linear support vector machine classifier (SVM) is
# trained on a toy data set and the trained classifier is used to predict labels
# of test examples. As training algorithm the SVMLIN solver is used with the SVM
# regularization parameter C=0.9 and the bias in the classification rule switched
# on and the precision parameter epsilon=1e-5. The example also shows how to
# retrieve parameters (vector w and bias b)) of the trained linear classifier.
#
# For more details on the SVMLIN solver see
# V. Sindhwani, S.S. Keerthi. Newton Methods for Fast Solution of Semi-supervised
# Linear SVMs. Large Scale Kernel Machines MIT Press (Book Chapter), 2007
library(shogun)
fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)
# svm lin
print('SVMLin')
realfeat <- RealFeatures(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feats_train, realfeat)
realfeat <- RealFeatures(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feats_test, realfeat)
C <- 1.42
epsilon <- 1e-5
num_threads <- as.integer(1)
labels <- Labels(label_train_twoclass)
svm <- SVMLin(C, feats_train, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$set_bias_enabled(svm, TRUE)
dump <- svm$train(svm)
dump <- svm$set_features(svm, feats_test)
dump <- svm$get_bias(svm)
dump <- svm$get_w(svm)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the OCAS solver is used with the SVM
# regularization parameter C=0.9 and the bias term in the classification rule
# switched off and the precision parameter epsilon=1e-5 (duality gap).
#
# For more details on the OCAS solver see
# V. Franc, S. Sonnenburg. Optimized Cutting Plane Algorithm for Large-Scale Risk
# Minimization.The Journal of Machine Learning Research, vol. 10,
# pp. 2157--2192. October 2009.
#
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)
# svm ocas
print('SVMOcas')
realfeat <- RealFeatures(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feats_train, realfeat)
realfeat <- RealFeatures(fm_test_real)
dump <- feats_test <- SparseRealFeatures()
feats_test$obtain_from_simple(feats_test, realfeat)
C <- 1.42
epsilon <- 1e-5
num_threads <- as.integer(1)
labels <- Labels(label_train_twoclass)
svm <- SVMOcas(C, feats_train, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$set_bias_enabled(svm, FALSE)
dump <- svm$train(svm)
dump <- svm$set_features(svm, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the Stochastic Gradient Descent (SGD) solver is
# used with the SVM regularization parameter C=0.9. The number of iterations, i.e.
# passes though all training examples, is set to num_iter=5 .
#
# For more details on the SGD solver see
# L. Bottou, O. Bousquet. The tradeoff of large scale learning. In NIPS 20. MIT
# Press. 2008.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)
# sgd
print('SVMSGD')
realfeat <- RealFeatures(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feats_train, realfeat)
realfeat <- RealFeatures(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feats_test, realfeat)
C <- 2.3
num_threads <- as.integer(1)
labels <- Labels(label_train_twoclass)
svm <- SVMSGD(C, feats_train, labels)
#dump <- svm$io$set_loglevel(svm$io, 0)
#dump <- svm$set_epochs(num_iter)
dump <- svm$train(svm)
dump <- svm$set_features(svm, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example the Histogram algorithm object computes a histogram over all
# 16bit unsigned integers in the features.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
# Histogram
print('Histogram')
order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_train_dna)
feats=StringWordFeatures(charfeat$get_alphabet())
dump <- feats$obtain_from_char(feats, charfeat, start, order, gap, reverse)
preproc=SortWordString()
dump <- preproc$init(preproc, feats)
dump <- feats$add_preproc(feats, preproc)
dump <- feats$apply_preproc(feats)
histo=Histogram(feats)
dump <- histo$train(histo)
dump <- histo$get_histogram()
num_examples <- feats$get_num_vectors()
num_param <- histo$get_num_model_parameters()
# commented out as this is quite time consuming
#derivs=matrix(0,num_param, num_examples)
#for (i in 0:(num_examples-1))
#{
# for (j in 0:(num_param-1))
# {
# derivs[j,i]=histo$get_log_derivative(histo, j, i)
# }
#}
dump <- histo$get_log_likelihood(histo)
dump <- histo$get_log_likelihood_sample()
# In this example a hidden markov model with 3 states and 6 transitions is trained
# on a string data set. After calling the constructor of the HMM class specifying
# the number of states and transitions the model is trained. Via the Baum-Welch
# algorithm the optimal transition and emission probabilities are estimated. The
# best path, i.e. the path with highest probability given the model can then be
# calculated using get_best_path_state.
library(shogun)
fm_train_cube <- as.matrix(read.table('../data/fm_train_cube.dat', colClasses=c('character')))
# HMM
print('HMM')
N <- as.integer(3)
M <- as.integer(6)
pseudo <- 1e-1
order <- as.integer(1)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE
num_examples <- as.integer(2)
charfeat <- StringCharFeatures("CUBE")
dump <- charfeat$set_features(charfeat, fm_train_cube)
feats <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats$obtain_from_char(feats, charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(preproc, feats)
dump <- feats$add_preproc(feats, preproc)
dump <- feats$apply_preproc(feats)
hmm <- HMM(feats, N, M, pseudo)
dump <- hmm$train(hmm)
dump <- hmm$baum_welch_viterbi_train(hmm, "BW_NORMAL")
num_examples <- feats$get_num_vectors()
num_param <- hmm$get_num_model_parameters()
derivs <- matrix(0, num_param, num_examples)
for (i in 0:(num_examples-1))
{
for (j in 0:(num_param-1))
{
derivs[j,i] <- hmm$get_log_derivative(hmm, j, i)
}
}
best_path <- 0
best_path_state <- 0
for (i in 0:(num_examples-1))
{
best_path = best_path + hmm$best_path(hmm, i)
for (j in 0:(N-1))
{
best_path_state = best_path_state + hmm$get_best_path_state(hmm, i, j)
}
}
dump <- hmm$get_log_likelihood(hmm)
dump <- hmm$get_log_likelihood_sample()
# Trains an inhomogeneous Markov chain of order 3 on a DNA string data set. Due to
# the structure of the Markov chain it is very similar to a HMM with just one
# chain of connected hidden states - that is why we termed this linear HMM.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
# Linear HMM
print('LinearHMM')
order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_train_dna)
feats <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats$obtain_from_char(feats, charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(preproc, feats)
dump <- feats$add_preproc(feats, preproc)
dump <- feats$apply_preproc(feats)
hmm <- LinearHMM(feats)
dump <- hmm$train(hmm)
dump <- hmm$get_transition_probs()
num_examples <- feats$get_num_vectors()
num_param <- hmm$get_num_model_parameters()
derivs <- matrix(0, num_param, num_examples)
for (i in 0:(num_examples-1))
{
for (j in 0:(num_param-1))
{
derivs[j,i] <- hmm$get_log_derivative(hmm, j, i)
}
}
dump <- hmm$get_log_likelihood(hmm)
dump <- hmm$get_log_likelihood_sample()
# This example demonstrates the use of the AUC Kernel.
library(shogun)
fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))
# auc
#print('AUC')
#
#feats_train <- RealFeatures(fm_train_real)
#feats_test <- RealFeatures(fm_test_real)
#width <- 1.7
#subkernel <- GaussianKernel(feats_train, feats_test, width)
#
#num_feats <- 2; # do not change!
#len_train <- 11
#len_test <- 17
#data <- uint16((len_train-1)*rand(num_feats, len_train))
#feats_train <- WordFeatures(data)
#data <- uint16((len_test-1)*rand(num_feats, len_test))
#feats_test <- WordFeatures(data)
#
#kernel <- AUCKernel(feats_train, feats_test, subkernel)
#
#km_train <- kernel$get_kernel_matrix()
#kernel$init(kernel, feats_train, feats_test)
#km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the chi2-kernel on real data, where
# each column of the matrices corresponds to one training/test example.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# chi2
print('Chi2')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 1.4
size_cache <- as.integer(10)
kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a combined kernel, which is a weighted sum of
# in this case three kernels on real valued data. The sub-kernel weights are all set to 1.
#
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
fm_train_dna <- t(as.matrix(read.table('../data/fm_train_dna.dat')))
fm_test_dna <- t(as.matrix(read.table('../data/fm_test_dna.dat')))
# combined
print('Combined')
kernel <- CombinedKernel()
feats_train <- CombinedFeatures()
feats_test <- CombinedFeatures()
subkfeats_train <- RealFeatures(fm_train_real)
subkfeats_test <- RealFeatures(fm_test_real)
subkernel <- GaussianKernel(as.integer(10), 1.6)
dump <- feats_train$append_feature_obj(feats_train, subkfeats_train)
dump <- feats_test$append_feature_obj(feats_test, subkfeats_test)
dump <- kernel$append_kernel(kernel, subkernel)
subkfeats_train <- StringCharFeatures("DNA")
dump <- subkfeats_train$set_features(subkfeats_train, fm_train_dna)
subkfeats_test <- StringCharFeatures("DNA")
dump <- subkfeats_test$set_features(subkfeats_test, fm_test_dna)
degree <- as.integer(3)
subkernel <- FixedDegreeStringKernel(as.integer(10), degree)
dump <- feats_train$append_feature_obj(feats_train, subkfeats_train)
dump <- feats_test$append_feature_obj(feats_test, subkfeats_test)
dump <- kernel$append_kernel(kernel, subkernel)
subkfeats_train <- StringCharFeatures("DNA")
dump <- subkfeats_train$set_features(subkfeats_train, fm_train_dna)
subkfeats_test <- StringCharFeatures("DNA")
dump <- subkfeats_test$set_features(subkfeats_test, fm_test_dna)
subkernel <- LocalAlignmentStringKernel(as.integer(10))
dump <- feats_train$append_feature_obj(feats_train, subkfeats_train)
dump <- feats_test$append_feature_obj(feats_test, subkfeats_test)
dump <- kernel$append_kernel(kernel, subkernel)
dump <- kernel$init(kernel, feats_train, feats_train)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the CommUlongString-kernel. This kernel
# sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
# that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
# only once.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# comm_ulong_string
print('CommUlongString')
order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_train_dna)
feats_train <- StringUlongFeatures(charfeat$get_alphabet())
dump <- feats_train$obtain_from_char(feats_train, charfeat, start, order, gap, reverse)
preproc <- SortUlongString()
dump <- preproc$init(preproc, feats_train)
dump <- feats_train$add_preproc(feats_train, preproc)
dump <- feats_train$apply_preproc(feats_train)
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_test_dna)
feats_test <- StringUlongFeatures(charfeat$get_alphabet())
dump <- feats_test$obtain_from_char(feats_test, charfeat, start, order, gap, reverse)
dump <- feats_test$add_preproc(feats_test, preproc)
dump <- feats_test$apply_preproc(feats_test)
use_sign <- FALSE
kernel <- CommUlongStringKernel(feats_train, feats_train, use_sign)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the CommWordString-kernel (aka
# Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel
# sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
# that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
# only once.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# comm_word_string
print('CommWordString')
order <- as.integer(3)
gap <- as.integer(0)
start <- as.integer(order-1)
reverse <- FALSE
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_train_dna)
feats_train <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_train$obtain_from_char(feats_train, charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(preproc, feats_train)
dump <- feats_train$add_preproc(feats_train, preproc)
dump <- feats_train$apply_preproc(feats_train)
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_test_dna)
feats_test <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_test$obtain_from_char(feats_test, charfeat, start, order, gap, reverse)
dump <- feats_test$add_preproc(feats_test, preproc)
dump <- feats_test$apply_preproc(feats_test)
use_sign <- FALSE
kernel <- CommWordStringKernel(feats_train, feats_train, use_sign)
km_train <- kernel$get_kernel_matrix()
kernel <- CommWordStringKernel(feats_train, feats_test, use_sign)
km_test <- kernel$get_kernel_matrix()
# The constant kernel gives a trivial kernel matrix with all entries set to the same value
# defined by the argument 'c'.
#
library(shogun)
fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))
# const
print('Const')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
c <- 23.
kernel <- ConstKernel(feats_train, feats_train, c)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# A user defined custom kernel is assigned in this example, for which only the lower triangle
# may be given (set_triangle_kernel_matrix_from_triangle) or
# a full matrix (set_full_kernel_matrix_from_full), or a full matrix which is then internally stored as a
# triangle (set_triangle_kernel_matrix_from_full). Labels for the examples are given, a svm is trained and
# the svm is used to classify the examples.
#
library(shogun)
## custom
#print('Custom')
#
#dim <- 7
#data <- rand(dim, dim)
#feats <- RealFeatures(data)
#symdata <- data+data'
#lowertriangle <- array([symdata[(x,y)] for x in xrange(symdata.shape[1])
# for y in xrange(symdata.shape[0]) if y< <- x])
#
#kernel <- CustomKernel(feats, feats)
#
#kernel$set_triangle_kernel_matrix_from_triangle(lowertriangle)
#km_triangletriangle <- kernel$get_kernel_matrix()
#
#kernel$set_triangle_kernel_matrix_from_full(symdata)
#km_fulltriangle <- kernel$get_kernel_matrix()
#
#kernel$set_full_kernel_matrix_from_full(data)
#km_fullfull <- kernel$get_kernel_matrix()
# This is an example for the initialization of the diag-kernel.
# The diag kernel has all kernel matrix entries but those on
# the main diagonal set to zero.
library(shogun)
fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))
# diag
print('Diag')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
diag <- 23.
kernel <- DiagKernel(feats_train, feats_train, diag)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# With the distance kernel one can use any of the following distance metrics:
# BrayCurtisDistance()
# CanberraMetric()
# CanberraWordDistance()
# ChebyshewMetric()
# ChiSquareDistance()
# CosineDistance()
# Distance()
# EuclidianDistance()
# GeodesicMetric()
# HammingWordDistance()
# JensenMetric()
# ManhattanMetric()
# ManhattanWordDistance()
# MinkowskiMetric()
# RealDistance()
# SimpleDistance()
# SparseDistance()
# SparseEuclidianDistance()
# StringDistance()
# TanimotoDistance()
#
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# distance
print('Distance')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 1.7
distance <- EuclidianDistance()
kernel <- DistanceKernel(feats_train, feats_test, width, distance)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# The FixedDegree String kernel takes as input two strings of same size and counts the number of matches of length d.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# fixed_degree_string
print('FixedDegreeString')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
degree <- as.integer(3)
kernel <- FixedDegreeStringKernel(feats_train, feats_train, degree)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# gaussian
print('Gaussian')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 1.9
kernel <- GaussianKernel(feats_train, feats_train, width)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# An experimental kernel inspired by the WeightedDegreePositionStringKernel and the Gaussian kernel.
# The idea is to shift the dimensions of the input vectors against eachother. 'shift_step' is the step
# size of the shifts and max_shift is the maximal shift.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# gaussian_shift
print('GaussianShift')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
width <- 1.8
max_shift <- as.integer(2)
shift_step <- as.integer(1)
kernel <- GaussianShiftKernel(
feats_train, feats_train, width, max_shift, shift_step)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# The HistogramWordString computes the TOP kernel on inhomogeneous Markov Chains.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
label_train_dna <- as.real(as.matrix(read.table('../data/label_train_dna.dat')))
# plugin_estimate
print('PluginEstimate w/ HistogramWord')
order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_train_dna)
feats_train <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_train$obtain_from_char(feats_train, charfeat, start, order, gap, reverse)
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_test_dna)
feats_test <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_test$obtain_from_char(feats_test, charfeat, start, order, gap, reverse)
pie <- PluginEstimate()
labels <- Labels(label_train_dna)
dump <- pie$set_labels(pie, labels)
dump <- pie$set_features(pie, feats_train)
dump <- pie$train(pie)
kernel <- HistogramWordStringKernel(feats_train, feats_train, pie)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
dump <- pie$set_features(pie, feats_test)
km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a linear kernel on raw byte
# data.
library(shogun)
fm_train_byte <- as.matrix(read.table('../data/fm_train_byte'))
fm_test_byte <- as.matrix(read.table('../data/fm_test_byte'))
# linear byte
print('LinearByte')
num_feats <- 11
feats_train <- ByteFeatures(RAWBYTE)
feats_train$copy_feature_matrix(traindata_byte)
feats_test <- ByteFeatures(RAWBYTE)
feats_test$copy_feature_matrix(testdata_byte)
kernel <- LinearByteKernel(feats_train, feats_train)
km_train <- kernel$get_kernel_matrix()
kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a linear kernel on real valued
# data using scaling factor 1.2.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# linear
print('Linear')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
scale <- 1.2
kernel <- LinearKernel()
dump <- kernel$set_normalizer(kernel, AvgDiagKernelNormalizer(scale))
dump <- kernel$init(kernel, feats_train, feats_train)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a linear kernel on string data. The
# strings are all of the same length and consist of the characters 'ACGT' corresponding
# to the DNA-alphabet. Each column of the matrices of type char corresponds to
# one training/test example.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# linear_string
print('LinearString')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
kernel <- LinearStringKernel(feats_train, feats_train)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a linear kernel on word (2byte)
# data.
library(shogun)
fm_train_word <- as.matrix(read.table('../data/fm_train_word.dat'))
fm_test_word <- as.matrix(read.table('../data/fm_test_word.dat'))
## linear_word
#print('LinearWord')
#
#feats_train <- WordFeatures(fm_train_word)
#feats_test <- WordFeatures(fm_test_word)
#do_rescale <- TRUE
#scale <- 1.4
#
#kernel <- LinearWordKernel(feats_train, feats_train, do_rescale, scale)
#
#km_train <- kernel$get_kernel_matrix()
#kernel$init(kernel, feats_train, feats_test)
#km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the local alignment kernel on
# DNA sequences, where each column of the matrices of type char corresponds to
# one training/test example.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# local_alignment_string
print('LocalAlignmentString')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
kernel <- LocalAlignmentStringKernel(feats_train, feats_train)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This example initializes the locality improved string kernel. The locality improved string
# kernel is defined on sequences of the same length and inspects letters matching at
# corresponding positions in both sequences. The kernel sums over all matches in windows of
# length l and takes this sum to the power of 'inner_degree'. The sum over all these
# terms along the sequence is taken to the power of 'outer_degree'.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# locality_improved_string
print('LocalityImprovedString')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
l <- as.integer(5)
inner_degree <- as.integer(5)
outer_degree <- as.integer(7)
kernel <- LocalityImprovedStringKernel(
feats_train, feats_train, l, inner_degree, outer_degree)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This is an example initializing the oligo string kernel which takes distances
# between matching oligos (k-mers) into account via a gaussian. Variable 'k' defines the length
# of the oligo and variable 'w' the width of the gaussian. The oligo string kernel is
# implemented for the DNA-alphabet 'ACGT'.
#
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# oligo_string
print('OligoString')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
k <- as.integer(3)
width <- 1.2
size_cache <- as.integer(10)
kernel <- OligoStringKernel(size_cache, k, width)
dump <- kernel$init(kernel, feats_train, feats_train)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This example initializes the polynomial kernel with real data.
# If variable 'inhomogene' is 'True' +1 is added to the scalar product
# before taking it to the power of 'degree'. If 'use_normalization' is
# set to 'true' then kernel matrix will be normalized by the square roots
# of the diagonal entries.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# poly
print('Poly')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
degree <- as.integer(4)
inhomogene <- FALSE
kernel <- PolyKernel(
feats_train, feats_train, degree, inhomogene)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the PolyMatchString kernel on string data.
# The PolyMatchString kernel sums over the matches of two stings of the same length and
# takes the sum to the power of 'degree'. The strings consist of the characters 'ACGT' corresponding
# to the DNA-alphabet. Each column of the matrices of type char corresponds to
# one training/test example.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# poly_match_string
print('PolyMatchString')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
degree <- as.integer(3)
inhomogene <- FALSE
kernel <- PolyMatchStringKernel(feats_train, feats_train, degree, inhomogene)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# The PolyMatchWordString kernel is defined on strings of equal length.
# The kernel sums over the matches of two stings of the same length and
# takes the sum to the power of 'degree'. The strings in this example
# consist of the characters 'ACGT' corresponding to the DNA-alphabet. Each
# column of the matrices of type char corresponds to one training/test example.
library(shogun)
fm_train_word <- as.matrix(read.table('../data/fm_train_word.dat'))
fm_test_word <- as.matrix(read.table('../data/fm_test_word.dat'))
## poly_match_word
#print('PolyMatchWord')
#
#feats_train <- WordFeatures(traindata_word)
#feats_test <- WordFeatures(testdata_word)
#degree <- 2
#inhomogene <- TRUE
#
#kernel <- PolyMatchWordKernel(feats_train, feats_train, degree, inhomogene)
#
#km_train <- kernel$get_kernel_matrix()
#kernel$init(kernel, feats_train, feats_test)
#km_test <- kernel$get_kernel_matrix()
# The standard Sigmoid kernel computed on dense real valued features.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# sigmoid
print('Sigmoid')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
size_cache <- as.integer(10)
gamma <- 1.2
coef0 <- 1.3
kernel <- SigmoidKernel(feats_train, feats_train, size_cache, gamma, coef0)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# SimpleLocalityImprovedString kernel, is a `simplified' and better performing version of the Locality improved kernel.
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# simple_locality_improved_string
print('SimpleLocalityImprovedString')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
l <- as.integer(5)
inner_degree <- as.integer(5)
outer_degree <- as.integer(7)
kernel <- SimpleLocalityImprovedStringKernel(
feats_train, feats_train, l, inner_degree, outer_degree)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# The well known Gaussian kernel (swiss army knife for SVMs) on sparse real valued features.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# sparse_gaussian
print('SparseGaussian')
feat <- RealFeatures(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feats_train, feat)
feat <- RealFeatures(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feats_test, feat)
width <- 1.1
kernel <- SparseGaussianKernel(feats_train, feats_train, width)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# Computes the standard linear kernel on sparse real valued features.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# sparse_linear
print('SparseLinear')
feat <- RealFeatures(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feats_train, feat)
feat <- RealFeatures(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feats_test, feat)
scale <- 1.1
kernel <- SparseLinearKernel()
dump <- kernel$set_normalizer(kernel, AvgDiagKernelNormalizer(scale))
dump <- kernel$init(kernel, feats_train, feats_train)
# Computes the standard polynomial kernel on sparse real valued features.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
# sparse_poly
print('SparsePoly')
feat <- RealFeatures(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feats_train, feat)
feat <- RealFeatures(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feats_test, feat)
size_cache <- as.integer(10)
degree <- as.integer(3)
inhomogene <- TRUE
kernel <- SparsePolyKernel(feats_train, feats_train, size_cache, degree,
inhomogene)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# The class TOPFeatures implements TOP kernel features obtained from
# two Hidden Markov models.
#
# It was used in
#
# K. Tsuda, M. Kawanabe, G. Raetsch, S. Sonnenburg, and K.R. Mueller. A new
# discriminative kernel from probabilistic models. Neural Computation,
# 14:2397-2414, 2002.
#
# which also has the details.
#
# Note that TOP-features are computed on the fly, so to be effective feature
# caching should be enabled.
#
# It inherits its functionality from CSimpleFeatures, which should be
# consulted for further reference.
#
library(shogun)
size_cache=as.integer(0)
fm_train_cube <- as.matrix(read.table('../data/fm_train_cube.dat', colClasses=c('character')))
fm_test_cube <- as.matrix(read.table('../data/fm_test_cube.dat', colClasses=c('character')))
# top_fisher
print('TOP/Fisher on PolyKernel')
N <- as.integer(3)
M <- as.integer(6)
pseudo <- 1e-1
order <- as.integer(1)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE
charfeat <- StringCharFeatures("CUBE")
dump <- charfeat$set_features(charfeat, fm_train_cube)
wordfeats_train <- StringWordFeatures(charfeat$get_alphabet())
dump <- wordfeats_train$obtain_from_char(wordfeats_train, charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(preproc, wordfeats_train)
dump <- wordfeats_train$add_preproc(wordfeats_train, preproc)
dump <- wordfeats_train$apply_preproc(wordfeats_train)
charfeat <- StringCharFeatures("CUBE")
dump <- charfeat$set_features(charfeat, fm_test_cube)
wordfeats_test <- StringWordFeatures(charfeat$get_alphabet())
dump <- wordfeats_test$obtain_from_char(wordfeats_test, charfeat, start, order, gap, reverse)
dump <- wordfeats_test$add_preproc(wordfeats_test, preproc)
dump <- wordfeats_test$apply_preproc(wordfeats_test)
pos <- HMM(wordfeats_train, N, M, pseudo)
dump <- pos$train(pos)
dump <- pos$baum_welch_viterbi_train(pos, "BW_NORMAL")
neg <- HMM(wordfeats_train, N, M, pseudo)
dump <- neg$train(neg)
dump <- neg$baum_welch_viterbi_train(neg, "BW_NORMAL")
pos_clone <- HMM(pos)
neg_clone <- HMM(neg)
dump <- pos_clone$set_observations(pos_clone, wordfeats_test)
dump <- neg_clone$set_observations(neg_clone, wordfeats_test)
feats_train <- TOPFeatures(size_cache, pos, neg, FALSE, FALSE)
feats_test <- TOPFeatures(size_cache, pos_clone, neg_clone, FALSE, FALSE)
kernel <- PolyKernel(feats_train, feats_train, as.integer(1), FALSE)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
feats_train <- FKFeatures(size_cache, pos, neg)
dump <- feats_train$set_opt_a(feats_train, -1); #estimate prior
feats_test <- FKFeatures(size_cache, pos_clone, neg_clone)
dump <- feats_test$set_a(feats_test, feats_train$get_a()); #use prior from training data
kernel <- PolyKernel(feats_train, feats_train, as.integer(1), FALSE)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# The WeightedCommWordString kernel may be used to compute the weighted
# spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer
# length is weighted by some coefficient \f$\beta_k\f$) from strings that have
# been mapped into unsigned 16bit integers.
#
# These 16bit integers correspond to k-mers. To applicable in this kernel they
# need to be sorted (e.g. via the SortWordString pre-processor).
#
# It basically uses the algorithm in the unix "comm" command (hence the name)
# to compute:
#
# k({\bf x},({\bf x'})= \sum_{k=1}^K\beta_k\Phi_k({\bf x})\cdot \Phi_k({\bf x'})
#
# where \f$\Phi_k\f$ maps a sequence \f${\bf x}\f$ that consists of letters in
# \f$\Sigma\f$ to a feature vector of size \f$|\Sigma|^k\f$. In this feature
# vector each entry denotes how often the k-mer appears in that \f${\bf x}\f$.
#
# Note that this representation is especially tuned to small alphabets
# (like the 2-bit alphabet DNA), for which it enables spectrum kernels
# of order 8.
#
# For this kernel the linadd speedups are quite efficiently implemented using
# direct maps.
#
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# weighted_comm_word_string
print('WeightedCommWordString')
order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- TRUE
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_train_dna)
feats_train <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_train$obtain_from_char(feats_train, charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(preproc, feats_train)
dump <- feats_train$add_preproc(feats_train, preproc)
dump <- feats_train$apply_preproc(feats_train)
charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(charfeat, fm_test_dna)
feats_test <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_test$obtain_from_char(feats_test, charfeat, start, order, gap, reverse)
dump <- feats_test$add_preproc(feats_test, preproc)
dump <- feats_test$apply_preproc(feats_test)
use_sign <- FALSE
kernel <- WeightedCommWordStringKernel(feats_train, feats_train, use_sign)
km_train <- kernel$get_kernel_matrix()
kernel <- WeightedCommWordStringKernel(feats_train, feats_test, use_sign)
km_test <- kernel$get_kernel_matrix()
# The Weighted Degree Position String kernel (Weighted Degree kernel with shifts).
#
# The WD-shift kernel of order d compares two sequences X and
# Y of length L by summing all contributions of k-mer matches of
# lengths k in 1...d, weighted by coefficients beta_k
# allowing for a positional tolerance of up to shift s.
#
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# weighted_degree_position_string
print('WeightedDegreePositionString')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
degree <- as.integer(20)
kernel <- WeightedDegreePositionStringKernel(feats_train, feats_train, degree)
#kernel$set_shifts(zeros(len(fm_train_dna[0]), dtype <- int))
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# The Weighted Degree String kernel.
#
# The WD kernel of order d compares two sequences X and
# Y of length L by summing all contributions of k-mer matches of
# lengths k in 1...d , weighted by coefficients beta_k. It
# is defined as
#
# k(X, Y)=\sum_{k=1}^d\beta_k\sum_{l=1}^{L-k+1}I(u_{k,l}(X)=u_{k,l}(Y)).
#
# Here, $u_{k,l}(X)$ is the string of length k starting at position
# l of the sequence X and I(.) is the indicator function
# which evaluates to 1 when its argument is true and to 0
# otherwise.
#
library(shogun)
fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
# weighted_degree_string
print('WeightedDegreeString')
feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(feats_train, fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(feats_test, fm_test_dna)
degree <- as.integer(20)
kernel <- WeightedDegreeStringKernel(feats_train, feats_train, degree)
#weights <- arange(1,degree+1,dtype <- double)[::-1]/ \
# sum(arange(1,degree+1,dtype <- double))
#kernel$set_wd_weights(weights)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
library(shogun)
# Explicit examples on how to use the different kernels
fm_train_word <- as.matrix(read.table('../data/fm_train_word.dat'))
fm_test_word <- as.matrix(read.table('../data/fm_test_word.dat'))
## word_match
#print('WordMatch')
#
#feats_train <- WordFeatures(fm_train_word)
#feats_test <- WordFeatures(fm_test_word)
#degree <- 3
#do_rescale <- TRUE
#scale <- 1.4
#
#kernel <- WordMatchKernel(feats_train, feats_train, degree, do_rescale, scale)
#
#km_train <- kernel$get_kernel_matrix()
#kernel$init(kernel, feats_train, feats_test)
#km_test <- kernel$get_kernel_matrix()
# In this example we show how to perform Multiple Kernel Learning (MKL)
# with the modular interface for multi-class classification.
# First, we create a number of base kernels and features.
# These kernels can capture different views of the same features, or actually
# consider entirely different features associated with the same example
# (e.g. DNA sequences = strings AND gene expression data = real values of the same tissue sample).
# The base kernels are then subsequently added to a CombinedKernel, which
# contains a weight for each kernel and encapsulates the base kernels
# from the training procedure. When the CombinedKernel between two examples is
# evaluated it computes the corresponding linear combination of kernels according to their weights.
# We then show how to create an MKLMultiClass classifier that trains an SVM and learns the optimal
# weighting of kernels (w.r.t. a given norm q) at the same time. The main difference to the binary
# classification version of MKL is that we can use more than two values as labels, when training
# the classifier.
# Finally, the example shows how to classify with a trained MKLMultiClass classifier.
#
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(as.matrix(read.table('../data/label_train_multiclass.dat')))
# MKLMultiClass
print('MKLMultiClass')
kernel <- CombinedKernel()
feats_train <- CombinedFeatures()
feats_test <- CombinedFeatures()
subkfeats_train <- RealFeatures(fm_train_real)
subkfeats_test <- RealFeatures(fm_test_real)
subkernel <- GaussianKernel(as.integer(10), 1.2)
dump <- feats_train$append_feature_obj(feats_train, subkfeats_train)
dump <- feats_test$append_feature_obj(feats_test, subkfeats_test)
dump <- kernel$append_kernel(kernel, subkernel)
kernel <- CombinedKernel()
feats_train <- CombinedFeatures()
feats_test <- CombinedFeatures()
subkfeats_train <- RealFeatures(fm_train_real)
subkfeats_test <- RealFeatures(fm_test_real)
subkernel <- LinearKernel()
dump <- feats_train$append_feature_obj(feats_train, subkfeats_train)
dump <- feats_test$append_feature_obj(feats_test, subkfeats_test)
dump <- kernel$append_kernel(kernel, subkernel)
kernel <- CombinedKernel()
feats_train <- CombinedFeatures()
feats_test <- CombinedFeatures()
subkfeats_train <- RealFeatures(fm_train_real)
subkfeats_test <- RealFeatures(fm_test_real)
subkernel <- PolyKernel(as.integer(10), as.integer(2))
dump <- feats_train$append_feature_obj(feats_train, subkfeats_train)
dump <- feats_test$append_feature_obj(feats_test, subkfeats_test)
dump <- kernel$append_kernel(kernel, subkernel)
dump <- kernel$init(kernel, feats_train, feats_train)
C <- 1.2
epsilon <- 1e-5
mkl_eps <- 0.001
mkl_norm <- 1
num_threads <- as.integer(1)
labels <- Labels(label_train_multiclass)
svm <- MKLMultiClass(C, kernel, labels)
dump <- svm$set_epsilon(svm, epsilon)
dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
dump <- svm$set_mkl_epsilon(svm,mkl_eps)
#dump <- svm$set_mkl_norm(1.5)
dump <- svm$train(svm)
dump <- kernel$init(kernel, feats_train, feats_test)
lab <- svm$classify(svm)
out <- lab$get_labels(lab)
# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# LogPlusOne adds one to a dense real-valued vector and takes the logarithm of
# each component of it. It is most useful in situations where the inputs are
# counts: When one compares differences of small counts any difference may matter
# a lot, while small differences in large counts don't. This is what this log
# transformation controls for.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
#LogPlusOne
print('LogPlusOne')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
preproc <- LogPlusOne()
dump <- preproc$init(preproc, feats_train)
dump <- feats_train$add_preproc(feats_train, preproc)
dump <- feats_train$apply_preproc(feats_train)
dump <- feats_test$add_preproc(feats_test, preproc)
dump <- feats_test$apply_preproc(feats_train)
width <- 1.4
size_cache <- as.integer(10)
kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# NormOne, normalizes vectors to have norm 1.
library(shogun)
fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
#NormOne
print('NormOne')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
preproc <- NormOne()
dump <- preproc$init(preproc, feats_train)
dump <- feats_train$add_preproc(feats_train, preproc)
dump <- feats_train$apply_preproc(feats_train)
dump <- feats_test$add_preproc(feats_test, preproc)
dump <- feats_test$apply_preproc(feats_test)
width <- 1.4
size_cache <- as.integer(10)
kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()
# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# PruneVarSubMean substracts the mean from each feature and removes features that
# have zero variance.
library(shogun)
fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))
#PruneVarSubMean
print('PruneVarSubMean')
feats_train <- RealFeatures(fm_train_real)
feats_test <- RealFeatures(fm_test_real)
preproc <- PruneVarSubMean()
dump <- preproc$init(preproc, feats_train)
dump <- feats_train$add_preproc(feats_train, preproc)
dump <- feats_train$apply_preproc(feats_train)
dump <- feats_test$add_preproc(feats_test, preproc)
dump <- feats_test$apply_preproc(feats_test)
width <- 1.4
size_cache <- as.integer(10)
kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(kernel, feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()