This page lists ready to run shogun examples for the R Modular interface.

To run the examples issue

R -f name_of_example.R

or start R and then type

source('name_of_example.R')

Classifier

../examples/documented/r_modular/classifier_gmnpsvm_modular.R

# In this example a multi-class support vector machine is trained on a toy data
# set and the trained classifier is then used to predict labels of test
# examples. The training algorithm is based on BSVM formulation (L2-soft margin
# and the bias added to the objective function) which is solved by the Improved
# Mitchell-Demyanov-Malozemov algorithm. The training algorithm uses the Gaussian
# kernel of width 2.1 and the regularization constant C=1. The solver stops if the
# relative duality gap falls below 1e-5. 
# 
# For more details on the used SVM solver see 
#  V.Franc: Optimization Algorithms for Kernel Methods. Research report.
#  CTU-CMP-2005-22. CTU FEL Prague. 2005.
#  ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-PhD.pdf .
# 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1)

# gmnpsvm
print('GMNPSVM')

feats_train <- RealFeatures()
dummy <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dummy <- feats_test$set_feature_matrix(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)

C <- 1.3
epsilon <- 1e-5
num_threads <- as.integer(1)
labels <- MulticlassLabels()
labels$set_labels(label_train_multiclass)
print(label_train_multiclass)

svm <- GMNPSVM(C, kernel, labels)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$train()

dump <- kernel$init(feats_train, feats_test)
lab <- svm$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_gpbtsvm_modular.R

# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is then used to predict labels of test
# examples. As training algorithm Gradient Projection Decomposition Technique
# (GPDT) is used with SVM regularization parameter C=1 and a Gaussian
# kernel of width 2.1. The solver returns an epsilon-precise (epsilon=1e-5) solution. 
# 
# For more details on GPDT solver see http://dm.unife.it/gpdt . 
# 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)

# gpbtsvm
print('GPBTSVM')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)

C <- 0.017
epsilon <- 1e-5
num_threads <- as.integer(2)
labels <- BinaryLabels()
labels$set_labels(label_train_twoclass)

svm <- GPBTSVM(C, kernel, labels)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$train()

dump <- kernel$init(feats_train, feats_test)
lab <- svm$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_knn_modular.R

# This example shows usage of a k-nearest neighbor (KNN) classification rule on
# a toy data set. The number of the nearest neighbors is set to k=3 and the distances
# are measured by the Euclidean metric. Finally, the KNN rule is applied to predict
# labels of test examples. 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1)

# knn
print('KNN')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)
distance <- EuclideanDistance()

k <- as.integer(3)
num_threads <- as.integer(1)
labels <- MulticlassLabels()
dump <- labels$set_labels(label_train_multiclass)

knn <- KNN(k, distance, labels)
dump <- knn$parallel$set_num_threads(num_threads)
dump <- knn$train(feats_train)
lab <- knn$apply(feats_test)
out <- lab$get_labels()

../examples/documented/r_modular/classifier_lda_modular.R

# In this example a two-class linear classifier based on the Linear Discriminant
# Analysis (LDA) is trained on a toy data set and then the trained classifier is
# used to predict test examples. The regularization parameter, which corresponds
# to a weight of a unitary matrix added to the covariance matrix, is set to
# gamma=3.
# 
# For more details on the LDA see e.g.
#     http://en.wikipedia.org/wiki/Linear_discriminant_analysis

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)

# lda
print('LDA')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)

gamma <- 3
labels <- BinaryLabels()
labels$set_labels(label_train_twoclass)

lda <- LDA(gamma, feats_train, labels)
dump <- lda$train()

dump <- lda$get_bias()
dump <- lda$get_w()
dump <- lda$set_features(feats_test)
lab <- lda$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_liblinear_modular.R

# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is then used to predict labels of
# test examples. As training algorithm the LIBLINEAR solver is used with the SVM
# regularization parameter C=0.9 and the bias in the classification rule switched
# on and the precision parameters epsilon=1e-5.
# 
# For more details on LIBLINEAR see
#     http://www.csie.ntu.edu.tw/~cjlin/liblinear/

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)

# liblinear
print('LibLinear')

realfeat <- RealFeatures()
dump <- realfeat$set_feature_matrix(fm_train_real)

feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(realfeat)
realfeat <- RealFeatures()
dump <- realfeat$set_feature_matrix(fm_test_real)

feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(realfeat)

C <- 1.42
epsilon <- 1e-5
num_threads <- as.integer(1)
labels <- BinaryLabels()
labels$set_labels(label_train_twoclass)

svm <- LibLinear(C, feats_train, labels)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$set_bias_enabled(TRUE)
dump <- svm$train()

dump <- svm$set_features(feats_test)
lab <- svm$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_libsvm_modular.R

# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the LIBSVM solver is used with SVM
# regularization parameter C=1 and a Gaussian kernel of width 2.1 and the
# precision parameter epsilon=1e-5. The example also shows how to retrieve the
# support vectors from the train SVM model.
# 
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)

# libsvm
print('LibSVM')

feats_train <- RealFeatures()
dummy <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dummy <- feats_test$set_feature_matrix(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)

C <- 1.017
epsilon <- 1e-5
num_threads <- as.integer(2)
labels <- BinaryLabels()
print(label_train_twoclass)
dump <- labels$set_labels(label_train_twoclass)

svm <- LibSVM(C, kernel, labels)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$train()

dump <- kernel$init(feats_train, feats_test)
lab <- svm$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_libsvmoneclass_modular.R

# In this example a one-class support vector machine classifier is trained on a
# toy data set. The training algorithm finds a hyperplane in the RKHS which
# separates the training data from the origin. The one-class classifier is
# typically used to estimate the support of a high-dimesnional distribution. 
# For more details see e.g. 
#   B. Schoelkopf et al. Estimating the support of a high-dimensional
#   distribution. Neural Computation, 13, 2001, 1443-1471. 
# 
# In the example, the one-class SVM is trained by the LIBSVM solver with the
# regularization parameter C=1 and the Gaussian kernel of width 2.1 and the
# precision parameter epsilon=1e-5.
# 
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# libsvm oneclass
print('LibSVMOneClass')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)

C <- 1.017
epsilon <- 1e-5
num_threads <- as.integer(4)

svm <- LibSVMOneClass(C, kernel)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$train()

dump <- kernel$init(feats_train, feats_test)
lab <- svm$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_mpdsvm_modular.R

# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the Minimal Primal Dual SVM is used with SVM
# regularization parameter C=1 and a Gaussian kernel of width 1.2 and the
# precision parameter 1e-5. 
# 
# For more details on the MPD solver see 
#  Kienzle, W. and B. Schölkopf: Training Support Vector Machines with Multiple
#  Equality Constraints. Machine Learning: ECML 2005, 182-193. (Eds.) Carbonell,
#  J. G., J. Siekmann, Springer, Berlin, Germany (11 2005)

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1)

# libsvmmulticlass
print('LibSVMMulticlass')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)

C <- 1.2
epsilon <- 1e-5
num_threads <- as.integer(8)
labels <- MulticlassLabels()
labels$set_labels(label_train_multiclass)

svm <- MulticlassLibSVM(C, kernel, labels)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$train()

dump <- kernel$init(feats_train, feats_test)
lab <- svm$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_multiclasslibsvm_modular.R

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1)

print('MulticlassLibSVM')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)
width <- 2.1
kernel <- GaussianKernel(feats_train, feats_train, width)

C <- 1.017
epsilon <- 1e-5
num_threads <- as.integer(8)
labels <- MulticlassLabels()
labels$set_labels(label_train_multiclass)

svm <- MulticlassLibSVM(C, kernel, labels)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$train()

dump <- kernel$init(feats_train, feats_test)
lab <- svm$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_perceptron_modular.R

# This example shows usage of the Perceptron algorithm for training a two-class
# linear classifier, i.e.  y = sign( <x,w>+b). The Perceptron algorithm works by
# iteratively passing though the training examples and applying the update rule on
# those examples which are misclassified by the current classifier. The Perceptron
# update rule reads
# 
#   w(t+1) = w(t) + alpha * y_t * x_t
#   b(t+1) = b(t) + alpha * y_t
# 
# where (x_t,y_t) is feature vector and label (must be +1/-1) of the misclassified example
#       (w(t),b(t)) are the current parameters of the linear classifier
#       (w(t+1),b(t+1)) are the new parameters of the linear classifier
#       alpha is the learning rate; in this examples alpha=1
# 
# The Perceptron algorithm iterates until all training examples are correctly
# classified or the prescribed maximal number of iterations, in this example
# max_iter=1000, is reached.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)

# perceptron
print('Perceptron')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)

learn_rate <- 1.
max_iter <- as.integer(1000)
num_threads <- as.integer(1)
labels <- BinaryLabels()
labels$set_labels(label_train_twoclass)

perceptron <- Perceptron(feats_train, labels)
dump <- perceptron$set_learn_rate(learn_rate)
dump <- perceptron$set_max_iter(max_iter)
dump <- perceptron$train()

dump <- perceptron$set_features(feats_test)
lab <- perceptron$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_svmlight_modular.R

# In this example a two-class support vector machine classifier is trained on a
# DNA splice-site detection data set and the trained classifier is used to predict
# labels on test set. As training algorithm SVM^light is used with SVM
# regularization parameter C=1.2 and the Weighted Degree kernel of degree 20 and
# the precision parameter epsilon=1e-5.
# 
# For more details on the SVM^light see
#  T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
#  Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
# 
# For more details on the Weighted Degree kernel see
#  G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively
#  spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005. 

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
label_train_dna <- as.real(read.table('../data/label_train_dna.dat')$V1)

# svm light
dosvmlight <- function()
{
	print('SVMLight')

	feats_train <- StringCharFeatures("DNA")
	dump <- feats_train$set_feature_matrix(feats_train, fm_train_dna)
	feats_test <- StringCharFeatures("DNA")
	dump <- feats_test$set_feature_matrix(feats_test, fm_test_dna)
	degree <- as.integer(20)

	kernel <- WeightedDegreeStringKernel(feats_train, feats_train, degree)

	C <- 1.017
	epsilon <- 1e-5
	num_threads <- as.integer(3)
	labels <- Labels(label_train_dna)

	svm <- SVMLight(C, kernel, labels)
	dump <- svm$set_epsilon(svm, epsilon)
	dump <- svm$parallel$set_num_threads(svm$parallel, num_threads)
	dump <- svm$train(svm)

	dump <- kernel$init(kernel, feats_train, feats_test)
	lab <- svm$apply(svm)
	out <- lab$get_labels(lab)
}
try(dosvmlight())

../examples/documented/r_modular/classifier_svmlin_modular.R

# In this example a two-class linear support vector machine classifier (SVM) is
# trained on a toy data set and the trained classifier is used to predict labels
# of test examples. As training algorithm the SVMLIN solver is used with the SVM
# regularization parameter C=0.9 and the bias in the classification rule switched
# on and the precision parameter epsilon=1e-5. The example also shows how to
# retrieve parameters (vector w and bias b)) of the trained linear classifier.
# 
# For more details on the SVMLIN solver see
#  V. Sindhwani, S.S. Keerthi. Newton Methods for Fast Solution of Semi-supervised
#  Linear SVMs. Large Scale Kernel Machines MIT Press (Book Chapter), 2007

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)

# svm lin
print('SVMLin')

realfeat <- RealFeatures()
dump <- realfeat$set_feature_matrix(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(realfeat)
realfeat <- RealFeatures()
dump <- realfeat$set_feature_matrix(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(realfeat)

C <- 1.42
epsilon <- 1e-5
num_threads <- as.integer(1)
labels <- BinaryLabels()
labels$set_labels(label_train_twoclass)

svm <- SVMLin(C, feats_train, labels)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$set_bias_enabled(TRUE)
dump <- svm$train()

dump <- svm$set_features(feats_test)
dump <- svm$get_bias()
dump <- svm$get_w()
lab <- svm$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_svmocas_modular.R

# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the OCAS solver is used with the SVM
# regularization parameter C=0.9 and the bias term in the classification rule
# switched off and the precision parameter epsilon=1e-5 (duality gap).
# 
# For more details on the OCAS solver see
#  V. Franc, S. Sonnenburg. Optimized Cutting Plane Algorithm for Large-Scale Risk
#  Minimization.The Journal of Machine Learning Research, vol. 10,
#  pp. 2157--2192. October 2009. 
# 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)

# svm ocas
print('SVMOcas')

realfeat <- RealFeatures()
dump <- realfeat$set_feature_matrix(fm_train_real)

feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(realfeat)
realfeat <- RealFeatures()
dump <- realfeat$set_feature_matrix(fm_test_real)
dump <- feats_test <- SparseRealFeatures()
feats_test$obtain_from_simple(realfeat)

C <- 1.42
epsilon <- 1e-5
num_threads <- as.integer(1)
labels <- BinaryLabels()
labels$set_labels(label_train_twoclass)

svm <- SVMOcas(C, feats_train, labels)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$set_bias_enabled(FALSE)
dump <- svm$train()

dump <- svm$set_features(feats_test)
lab <- svm$apply()
out <- lab$get_labels()

../examples/documented/r_modular/classifier_svmsgd_modular.R

# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the Stochastic Gradient Descent (SGD) solver is
# used with the SVM regularization parameter C=0.9. The number of iterations, i.e.
# passes though all training examples, is set to num_iter=5 .
# 
# For more details on the SGD solver see
#  L. Bottou, O. Bousquet. The tradeoff of large scale learning. In NIPS 20. MIT
#  Press. 2008.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1)

# sgd
print('SVMSGD')

realfeat <- RealFeatures()
dummy <- realfeat$set_feature_matrix(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(realfeat)
realfeat <- RealFeatures()
dummy <- realfeat$set_feature_matrix(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(realfeat)

C <- 2.3
num_threads <- as.integer(1)
labels <- BinaryLabels()
labels$set_labels(label_train_twoclass)

svm <- SVMSGD(C, feats_train, labels)
#dump <- svm$io$set_loglevel(0)
#dump <- svm$set_epochs(num_iter)
dump <- svm$train()

dump <- svm$set_features(feats_test)
lab <- svm$apply()
out <- lab$get_labels()

Distribution

../examples/documented/r_modular/distribution_histogram_modular.R

# In this example the Histogram algorithm object computes a histogram over all
# 16bit unsigned integers in the features.

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))

# Histogram
print('Histogram')

order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE

charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_train_dna)
feats=StringWordFeatures(charfeat$get_alphabet())
dump <- feats$obtain_from_char(charfeat, start, order, gap, reverse)
preproc=SortWordString()
dump <- preproc$init(feats)
dump <- feats$add_preproc(preproc)
dump <- feats$apply_preproc()

histo=Histogram(feats)
dump <- histo$train()

dump <- histo$get_histogram()

num_examples <- feats$get_num_vectors()
num_param <- histo$get_num_model_parameters()

# commented out as this is quite time consuming
#derivs=matrix(0,num_param, num_examples)
#for (i in 0:(num_examples-1))
#{
#	for (j in 0:(num_param-1))
#	{
#		derivs[j,i]=histo$get_log_derivative(histo, j, i)
#	}
#}
dump <- histo$get_log_likelihood()
dump <- histo$get_log_likelihood_sample()

../examples/documented/r_modular/distribution_hmm_modular.R

# In this example a hidden markov model with 3 states and 6 transitions is trained
# on a string data set. After calling the constructor of the HMM class specifying
# the number of states and transitions the model is trained. Via the Baum-Welch
# algorithm the optimal transition and emission probabilities are estimated. The
# best path, i.e. the path with highest probability given the model can then be
# calculated using get_best_path_state.

library(shogun)

fm_train_cube <- as.matrix(read.table('../data/fm_train_cube.dat', colClasses=c('character')))

# HMM
print('HMM')

N <- as.integer(3)
M <- as.integer(6)
pseudo <- 1e-1
order <- as.integer(1)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE
num_examples <- as.integer(2)

charfeat <- StringCharFeatures("CUBE")
dump <- charfeat$set_features(fm_train_cube)
feats <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats$obtain_from_char(charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(feats)
dump <- feats$add_preproc(preproc)
dump <- feats$apply_preproc()

hmm <- HMM(feats, N, M, pseudo)
dump <- hmm$train()
dump <- hmm$baum_welch_viterbi_train("BW_NORMAL")

num_examples <- feats$get_num_vectors()
num_param <- hmm$get_num_model_parameters()

derivs <- matrix(0, num_param, num_examples)
for (i in 0:(num_examples-1))
{
	for (j in 0:(num_param-1))
	{
		derivs[j,i] <- hmm$get_log_derivative(j, i)
	}
}

best_path <- 0
best_path_state <- 0

for (i in 0:(num_examples-1))
{
	best_path = best_path + hmm$best_path(i)
	for (j in 0:(N-1))
	{
		best_path_state = best_path_state + hmm$get_best_path_state(i, j)
	}
}

dump <- hmm$get_log_likelihood()
dump <- hmm$get_log_likelihood_sample()

../examples/documented/r_modular/distribution_linearhmm_modular.R

# Trains an inhomogeneous Markov chain of order 3 on a DNA string data set. Due to
# the structure of the Markov chain it is very similar to a HMM with just one
# chain of connected hidden states - that is why we termed this linear HMM.

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))

# Linear HMM
print('LinearHMM')

order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE

charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_train_dna)
feats <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats$obtain_from_char(charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(feats)
dump <- feats$add_preproc(preproc)
dump <- feats$apply_preproc()

hmm <- LinearHMM(feats)
dump <- hmm$train()

dump <- hmm$get_transition_probs()

num_examples <- feats$get_num_vectors()
num_param <- hmm$get_num_model_parameters()
derivs <- matrix(0, num_param, num_examples)

for (i in 0:(num_examples-1))
{
	for (j in 0:(num_param-1))
	{
		derivs[j,i] <- hmm$get_log_derivative(j, i)
	}
}

#dump <- hmm$get_log_likelihood()
dump <- hmm$get_log_likelihood_sample()

Kernel

../examples/documented/r_modular/kernel_auc_modular.R

# This example demonstrates the use of the AUC Kernel, which
# can be used to maximize AUC instead of margin in SVMs.

library(shogun)

fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))

# auc
#print('AUC')
#
#feats_train <- RealFeatures(fm_train_real)
#feats_test <- RealFeatures(fm_test_real)
#width <- 1.7
#subkernel <- GaussianKernel(feats_train, feats_test, width)
#
#num_feats <- 2; # do not change!
#len_train <- 11
#len_test <- 17
#data <- uint16((len_train-1)*rand(num_feats, len_train))
#feats_train <- WordFeatures(data)
#data <- uint16((len_test-1)*rand(num_feats, len_test))
#feats_test <- WordFeatures(data)
#
#kernel <- AUCKernel(feats_train, feats_test, subkernel)
#
#km_train <- kernel$get_kernel_matrix()
#kernel$init(kernel, feats_train, feats_test)
#km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_chi2_modular.R

# This is an example for the initialization of the chi2-kernel on real data, where 
# each column of the matrices corresponds to one training/test example. 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# chi2
print('Chi2')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)
width <- 1.4
size_cache <- as.integer(10)

kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_combined_modular.R

# This is an example for the initialization of a combined kernel, which is a weighted sum of 
# in this case three kernels on real valued data. The sub-kernel weights are all set to 1. 
# 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
fm_train_dna <- t(as.matrix(read.table('../data/fm_train_dna.dat')))
fm_test_dna <- t(as.matrix(read.table('../data/fm_test_dna.dat')))

# combined
print('Combined')

kernel <- CombinedKernel()
feats_train <- CombinedFeatures()
feats_test <- CombinedFeatures()

subkfeats_train <- RealFeatures()
dump <- subkfeats_train$set_feature_matrix(fm_train_real)
subkfeats_test <- RealFeatures()
dump <- subkfeats_test$set_feature_matrix(fm_test_real)
subkernel <- GaussianKernel(as.integer(10), 1.6)
dump <- feats_train$append_feature_obj(subkfeats_train)
dump <- feats_test$append_feature_obj(subkfeats_test)
dump <- kernel$append_kernel(subkernel)

subkfeats_train <- StringCharFeatures("DNA")
dump <- subkfeats_train$set_features(fm_train_dna)
subkfeats_test <- StringCharFeatures("DNA")
dump <- subkfeats_test$set_features(fm_test_dna)
degree <- as.integer(3)
subkernel <- FixedDegreeStringKernel(as.integer(10), degree)
dump <- feats_train$append_feature_obj(subkfeats_train)
dump <- feats_test$append_feature_obj(subkfeats_test)
dump <- kernel$append_kernel(subkernel)

subkfeats_train <- StringCharFeatures("DNA")
dump <- subkfeats_train$set_features(fm_train_dna)
subkfeats_test <- StringCharFeatures("DNA")
dump <- subkfeats_test$set_features(fm_test_dna)
subkernel <- LocalAlignmentStringKernel(as.integer(10))
dump <- feats_train$append_feature_obj(subkfeats_train)
dump <- feats_test$append_feature_obj(subkfeats_test)
dump <- kernel$append_kernel(subkernel)

dump <- kernel$init(feats_train, feats_train)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_comm_ulong_string_modular.R

# This is an example for the initialization of the CommUlongString-kernel. This kernel 
# sums over k-mere matches (k='order'). For efficient computing a preprocessor is used 
# that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted 
# only once. 

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# comm_ulong_string
print('CommUlongString')

order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE

charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_train_dna)
feats_train <- StringUlongFeatures(charfeat$get_alphabet())
dump <- feats_train$obtain_from_char(charfeat, start, order, gap, reverse)
preproc <- SortUlongString()
dump <- preproc$init(feats_train)
dump <- feats_train$add_preproc(preproc)
dump <- feats_train$apply_preproc()


charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_test_dna)
feats_test <- StringUlongFeatures(charfeat$get_alphabet())
dump <- feats_test$obtain_from_char(charfeat, start, order, gap, reverse)
dump <- feats_test$add_preproc(preproc)
dump <- feats_test$apply_preproc()

use_sign <- FALSE

kernel <- CommUlongStringKernel(feats_train, feats_train, use_sign)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_comm_word_string_modular.R

# This is an example for the initialization of the CommWordString-kernel (aka
# Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel 
# sums over k-mere matches (k='order'). For efficient computing a preprocessor is used 
# that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted 
# only once. 

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# comm_word_string
print('CommWordString')

order <- as.integer(3)
gap <- as.integer(0)
start <- as.integer(order-1)
reverse <- FALSE

charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_train_dna)
feats_train <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_train$obtain_from_char(charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(feats_train)
dump <- feats_train$add_preproc(preproc)
dump <- feats_train$apply_preproc()

charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_test_dna)
feats_test <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_test$obtain_from_char(charfeat, start, order, gap, reverse)
dump <- feats_test$add_preproc(preproc)
dump <- feats_test$apply_preproc()

use_sign <- FALSE

kernel <- CommWordStringKernel(feats_train, feats_train, use_sign)

km_train <- kernel$get_kernel_matrix()

kernel <- CommWordStringKernel(feats_train, feats_test, use_sign)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_const_modular.R

# The constant kernel gives a trivial kernel matrix with all entries set to the same value 
# defined by the argument 'c'. 
# 

library(shogun)

fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))

# const
print('Const')

feats_train <- RealFeatures()
dummy <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dummy <- feats_test$set_feature_matrix(fm_test_real)
c <- 23.

kernel <- ConstKernel(feats_train, feats_train, c)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_custom_modular.R

# A user defined custom kernel is assigned in this example, for which only the lower triangle 
# may be given (set_triangle_kernel_matrix_from_triangle) or 
# a full matrix (set_full_kernel_matrix_from_full), or a full matrix which is then internally stored as a 
# triangle (set_triangle_kernel_matrix_from_full). Labels for the examples are given, a svm is trained and 
# the svm is used to classify the examples. 
# 

library(shogun)

## custom
#print('Custom')
#
#dim <- 7
#data <- rand(dim, dim)
#feats <- RealFeatures(data)
#symdata <- data+data'
#lowertriangle <- array([symdata[(x,y)] for x in xrange(symdata.shape[1])
#	for y in xrange(symdata.shape[0]) if y< <- x])
#
#kernel <- CustomKernel(feats, feats)
#
#kernel$set_triangle_kernel_matrix_from_triangle(lowertriangle)
#km_triangletriangle <- kernel$get_kernel_matrix()
#
#kernel$set_triangle_kernel_matrix_from_full(symdata)
#km_fulltriangle <- kernel$get_kernel_matrix()
#
#kernel$set_full_kernel_matrix_from_full(data)
#km_fullfull <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_diag_modular.R

# This is an example for the initialization of the diag-kernel. 
# The diag kernel has all kernel matrix entries but those on 
# the main diagonal set to zero. 

library(shogun)

fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))

# diag
print('Diag')

feats_train <- RealFeatures()
dummy <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dummy <- feats_test$set_feature_matrix(fm_test_real)
diag <- 23.

kernel <- DiagKernel(feats_train, feats_train, diag)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_distance_modular.R

# With the distance kernel one can use any of the following distance metrics: 
# BrayCurtisDistance()
# CanberraMetric()
# CanberraWordDistance()
# ChebyshewMetric()
# ChiSquareDistance()
# CosineDistance()
# Distance()
# EuclidianDistance()
# GeodesicMetric()
# HammingWordDistance()
# JensenMetric()
# ManhattanMetric()
# ManhattanWordDistance()
# MinkowskiMetric()
# RealDistance()
# SimpleDistance()
# SparseDistance()
# SparseEuclidianDistance()
# StringDistance()
# TanimotoDistance()
# 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# distance
print('Distance')

feats_train <- RealFeatures()
dummy <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dummy <- feats_test$set_feature_matrix(fm_test_real)
width <- 1.7
distance <- EuclideanDistance()

kernel <- DistanceKernel(feats_train, feats_test, width, distance)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_fixed_degree_string_modular.R

# The FixedDegree String kernel takes as input two strings of same size and counts the number of matches of length d.

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# fixed_degree_string
print('FixedDegreeString')

feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(fm_test_dna)
degree <- as.integer(3)

kernel <- FixedDegreeStringKernel(feats_train, feats_train, degree)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_gaussian_modular.R

# The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# gaussian
print('Gaussian')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)
width <- 1.9

kernel <- GaussianKernel(feats_train, feats_train, width)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_gaussian_shift_modular.R

# An experimental kernel inspired by the WeightedDegreePositionStringKernel and the Gaussian kernel.
# The idea is to shift the dimensions of the input vectors against eachother. 'shift_step' is the step 
# size of the shifts and  max_shift is the maximal shift.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# gaussian_shift
print('GaussianShift')

feats_train <- RealFeatures()
dummy <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dummy <- feats_test$set_feature_matrix(fm_test_real)
width <- 1.8
max_shift <- as.integer(2)
shift_step <- as.integer(1)

kernel <- GaussianShiftKernel(
	feats_train, feats_train, width, max_shift, shift_step)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_histogramword_modular.R

# The HistogramWordString computes the TOP kernel on inhomogeneous Markov Chains.

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))
label_train_dna <- as.real(as.matrix(read.table('../data/label_train_dna.dat')))

# plugin_estimate
print('PluginEstimate w/ HistogramWord')

order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE

charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_train_dna)
feats_train <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_train$obtain_from_char(charfeat, start, order, gap, reverse)

charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_test_dna)
feats_test <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_test$obtain_from_char(charfeat, start, order, gap, reverse)

pie <- PluginEstimate()
labels <- BinaryLabels()
labels$set_labels(label_train_dna)
dump <- pie$set_labels(labels)
dump <- pie$set_features(feats_train)
dump <- pie$train()

kernel <- HistogramWordStringKernel(feats_train, feats_train, pie)
km_train <- kernel$get_kernel_matrix()

dump <- kernel$init(feats_train, feats_test)
dump <- pie$set_features(feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_linear_byte_modular.R

# This is an example for the initialization of a linear kernel on raw byte
# data. 

library(shogun)

# linear byte
print('LinearByte')

feats_train <- ByteFeatures(CSVFile('../data/fm_train_byte.dat'))
feats_test <- ByteFeatures(CSVFile('../data/fm_test_byte.dat'))

kernel <- LinearKernel(feats_train, feats_train)

km_train <- kernel$get_kernel_matrix()
kernel <- LinearKernel(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_linear_modular.R

# This is an example for the initialization of a linear kernel on real valued 
# data using scaling factor 1.2. 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# linear
print('Linear')

feats_train <- RealFeatures()
dummy <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dummy <- feats_test$set_feature_matrix(fm_test_real)
scale <- 1.2

kernel <- LinearKernel(feats_train, feats_train)
dump <- kernel$set_normalizer(AvgDiagKernelNormalizer(scale))
km_train <- kernel$get_kernel_matrix()

kernel <- LinearKernel(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_linear_string_modular.R

# This is an example for the initialization of a linear kernel on string data. The 
# strings are all of the same length and consist of the characters 'ACGT' corresponding 
# to the DNA-alphabet. Each column of the matrices of type char corresponds to 
# one training/test example.

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# linear_string
print('LinearString')

feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(fm_test_dna)

kernel <- LinearStringKernel(feats_train, feats_train)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_linear_word_modular.R

# This is an example for the initialization of a linear kernel on word (2byte) 
# data. 

library(shogun)

fm_train_word <- as.matrix(read.table('../data/fm_train_word.dat'))
fm_test_word <- as.matrix(read.table('../data/fm_test_word.dat'))

## linear_word
#print('LinearWord')
#
#feats_train <- WordFeatures(fm_train_word)
#feats_test <- WordFeatures(fm_test_word)
#do_rescale <- TRUE
#scale <- 1.4
#
#kernel <- LinearWordKernel(feats_train, feats_train, do_rescale, scale)
#
#km_train <- kernel$get_kernel_matrix()
#kernel$init(kernel, feats_train, feats_test)
#km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_local_alignment_string_modular.R

# This is an example for the initialization of the local alignment kernel on 
# DNA sequences, where each column of the matrices of type char corresponds to 
# one training/test example. 

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# local_alignment_string
print('LocalAlignmentString')

feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(fm_test_dna)

kernel <- LocalAlignmentStringKernel(feats_train, feats_train)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_localityimprovedstring_modular.R

# This example initializes the locality improved string kernel. The locality improved string 
# kernel is defined on sequences of the same length and inspects letters matching at 
# corresponding positions in both sequences. The kernel sums over all matches in windows of 
# length l and takes this sum to the power of 'inner_degree'. The sum over all these 
# terms along the sequence is taken to the power of 'outer_degree'. 

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# locality_improved_string
print('LocalityImprovedString')

feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(fm_test_dna)
l <- as.integer(5)
inner_degree <- as.integer(5)
outer_degree <- as.integer(7)

kernel <- LocalityImprovedStringKernel(
	feats_train, feats_train, l, inner_degree, outer_degree)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_oligo_string_modular.R

# This is an example initializing the oligo string kernel which takes distances 
# between matching oligos (k-mers) into account via a gaussian. Variable 'k' defines the length 
# of the oligo and variable 'w' the width of the gaussian. The oligo string kernel is 
# implemented for the DNA-alphabet 'ACGT'. 
#  

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# oligo_string
print('OligoString')

feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(fm_test_dna)
k <- as.integer(3)
width <- 1.2
size_cache <- as.integer(10)

kernel <- OligoStringKernel(size_cache,  k, width)
dump <- kernel$init(feats_train, feats_train)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_poly_modular.R

# This example initializes the polynomial kernel with real data. 
# If variable 'inhomogene' is 'True' +1 is added to the scalar product 
# before taking it to the power of 'degree'. If 'use_normalization' is 
# set to 'true' then kernel matrix will be normalized by the square roots
# of the diagonal entries. 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# poly
print('Poly')

feats_train <- RealFeatures()
dummy <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dummy <- feats_test$set_feature_matrix(fm_test_real)
degree <- as.integer(4)
inhomogene <- FALSE

kernel <- PolyKernel(
	feats_train, feats_train, degree, inhomogene)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_polymatchstring_modular.R

# This is an example for the initialization of the PolyMatchString kernel on string data. 
# The PolyMatchString kernel sums over the matches of two stings of the same length and 
# takes the sum to the power of 'degree'. The strings consist of the characters 'ACGT' corresponding 
# to the DNA-alphabet. Each column of the matrices of type char corresponds to 
# one training/test example.

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# poly_match_string
print('PolyMatchString')

feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(fm_test_dna)
degree <- as.integer(3)
inhomogene <- FALSE

kernel <- PolyMatchStringKernel(feats_train, feats_train, degree, inhomogene)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_polymatchword_modular.R

# The PolyMatchWordString kernel is defined on strings of equal length. 
# The kernel sums over the matches of two stings of the same length and 
# takes the sum to the power of 'degree'. The strings in this example 
# consist of the characters 'ACGT' corresponding to the DNA-alphabet. Each 
# column of the matrices of type char corresponds to one training/test example.

library(shogun)

fm_train_word <- as.matrix(read.table('../data/fm_train_word.dat'))
fm_test_word <- as.matrix(read.table('../data/fm_test_word.dat'))

## poly_match_word
#print('PolyMatchWord')
#
#feats_train <- WordFeatures(traindata_word)
#feats_test <- WordFeatures(testdata_word)
#degree <- 2
#inhomogene <- TRUE
#
#kernel <- PolyMatchWordKernel(feats_train, feats_train, degree, inhomogene)
#
#km_train <- kernel$get_kernel_matrix()
#kernel$init(kernel, feats_train, feats_test)
#km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_sigmoid_modular.R

# The standard Sigmoid kernel computed on dense real valued features.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# sigmoid
print('Sigmoid')

feats_train <- RealFeatures()
dummy <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dummy <- feats_test$set_feature_matrix(fm_test_real)
size_cache <- as.integer(10)
gamma <- 1.2
coef0 <- 1.3

kernel <- SigmoidKernel(feats_train, feats_train, size_cache, gamma, coef0)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_simple_locality_improved_string_modular.R

# SimpleLocalityImprovedString kernel, is a `simplified' and better performing version of the Locality improved kernel.

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# simple_locality_improved_string
print('SimpleLocalityImprovedString')

feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(fm_test_dna)
l <- as.integer(5)
inner_degree <- as.integer(5)
outer_degree <- as.integer(7)

kernel <- SimpleLocalityImprovedStringKernel(
	feats_train, feats_train, l, inner_degree, outer_degree)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_sparsegaussian_modular.R

# The well known Gaussian kernel (swiss army knife for SVMs) on sparse real valued features.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# sparse_gaussian
print('SparseGaussian')

feat <- RealFeatures()
dummy <- feat$set_feature_matrix(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feat)
feat <- RealFeatures()
dummy <- feat$set_feature_matrix(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feat)
width <- 1.1

kernel <- GaussianKernel(feats_train, feats_train, width)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_sparselinear_modular.R

# Computes the standard linear kernel on sparse real valued features.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# sparse_linear
print('SparseLinear')

feat <- RealFeatures()
dummy <- feat$set_feature_matrix(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feat)
feat <- RealFeatures()
dummy <- feat$set_feature_matrix(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feat)
scale <- 1.1

kernel <- LinearKernel(feats_train,feats_train)
dump <- kernel$set_normalizer(AvgDiagKernelNormalizer(scale))

../examples/documented/r_modular/kernel_sparsepoly_modular.R

# Computes the standard polynomial kernel on sparse real valued features.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

# sparse_poly
print('SparsePoly')

feat <- RealFeatures()
dummy <- feat$set_feature_matrix(fm_train_real)
feats_train <- SparseRealFeatures()
dump <- feats_train$obtain_from_simple(feat)
feat <- RealFeatures()
dummy <- feat$set_feature_matrix(fm_test_real)
feats_test <- SparseRealFeatures()
dump <- feats_test$obtain_from_simple(feat)
size_cache <- as.integer(10)
degree <- as.integer(3)
inhomogene <- TRUE

kernel <- PolyKernel(feats_train, feats_train, degree, inhomogene)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_top_modular.R

# The class TOPFeatures implements TOP kernel features obtained from
# two Hidden Markov models.
# 
# It was used in
# 
# K. Tsuda, M. Kawanabe, G. Raetsch, S. Sonnenburg, and K.R. Mueller. A new
# discriminative kernel from probabilistic models. Neural Computation,
# 14:2397-2414, 2002.
# 
# which also has the details.
# 
# Note that TOP-features are computed on the fly, so to be effective feature
# caching should be enabled.
# 
# It inherits its functionality from CSimpleFeatures, which should be
# consulted for further reference.
# 

library(shogun)

size_cache=as.integer(0)
fm_train_cube <- as.matrix(read.table('../data/fm_train_cube.dat', colClasses=c('character')))
fm_test_cube <- as.matrix(read.table('../data/fm_test_cube.dat', colClasses=c('character')))

# top_fisher
print('TOP/Fisher on PolyKernel')

N <- as.integer(3)
M <- as.integer(6)
pseudo <- 1e-1
order <- as.integer(1)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- FALSE

charfeat <- StringCharFeatures("CUBE")
dump <- charfeat$set_features(fm_train_cube)
wordfeats_train <- StringWordFeatures(charfeat$get_alphabet())
dump <- wordfeats_train$obtain_from_char(charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(wordfeats_train)
dump <- wordfeats_train$add_preproc(preproc)
dump <- wordfeats_train$apply_preproc()

charfeat <- StringCharFeatures("CUBE")
dump <- charfeat$set_features(fm_test_cube)
wordfeats_test <- StringWordFeatures(charfeat$get_alphabet())
dump <- wordfeats_test$obtain_from_char(charfeat, start, order, gap, reverse)
dump <- wordfeats_test$add_preproc(preproc)
dump <- wordfeats_test$apply_preproc()

pos <- HMM(wordfeats_train, N, M, pseudo)
dump <- pos$train()
dump <- pos$baum_welch_viterbi_train("BW_NORMAL")
neg <- HMM(wordfeats_train, N, M, pseudo)
dump <- neg$train()
dump <- neg$baum_welch_viterbi_train("BW_NORMAL")
pos_clone <- HMM(pos)
neg_clone <- HMM(neg)
dump <- pos_clone$set_observations(wordfeats_test)
dump <- neg_clone$set_observations(wordfeats_test)

feats_train <- TOPFeatures(size_cache, pos, neg, FALSE, FALSE)
feats_test <- TOPFeatures(size_cache, pos_clone, neg_clone, FALSE, FALSE)
kernel <- PolyKernel(feats_train, feats_train, as.integer(1), FALSE)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

feats_train <- FKFeatures(size_cache, pos, neg)
dump <- feats_train$set_opt_a(-1); #estimate prior
feats_test <- FKFeatures(size_cache, pos_clone, neg_clone)
dump <- feats_test$set_a(feats_train$get_a()); #use prior from training data
kernel <- PolyKernel(feats_train, feats_train, as.integer(1), FALSE)
km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_weighted_comm_word_string_modular.R

# The WeightedCommWordString kernel may be used to compute the weighted
# spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer
# length is weighted by some coefficient \f$\beta_k\f$) from strings that have
# been mapped into unsigned 16bit integers.
# 
# These 16bit integers correspond to k-mers. To applicable in this kernel they
# need to be sorted (e.g. via the SortWordString pre-processor).
# 
# It basically uses the algorithm in the unix "comm" command (hence the name)
# to compute:
# 
# k({\bf x},({\bf x'})= \sum_{k=1}^K\beta_k\Phi_k({\bf x})\cdot \Phi_k({\bf x'})
# 
# where \f$\Phi_k\f$ maps a sequence \f${\bf x}\f$ that consists of letters in
# \f$\Sigma\f$ to a feature vector of size \f$|\Sigma|^k\f$. In this feature
# vector each entry denotes how often the k-mer appears in that \f${\bf x}\f$.
# 
# Note that this representation is especially tuned to small alphabets
# (like the 2-bit alphabet DNA), for which it enables spectrum kernels
# of order 8.
# 
# For this kernel the linadd speedups are quite efficiently implemented using
# direct maps.
# 

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# weighted_comm_word_string
print('WeightedCommWordString')

order <- as.integer(3)
start <- as.integer(order-1)
gap <- as.integer(0)
reverse <- TRUE

charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_train_dna)
feats_train <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_train$obtain_from_char(charfeat, start, order, gap, reverse)
preproc <- SortWordString()
dump <- preproc$init(feats_train)
dump <- feats_train$add_preproc(preproc)
dump <- feats_train$apply_preproc()

charfeat <- StringCharFeatures("DNA")
dump <- charfeat$set_features(fm_test_dna)
feats_test <- StringWordFeatures(charfeat$get_alphabet())
dump <- feats_test$obtain_from_char(charfeat, start, order, gap, reverse)
dump <- feats_test$add_preproc(preproc)
dump <- feats_test$apply_preproc()

use_sign <- FALSE

kernel <- WeightedCommWordStringKernel(feats_train, feats_train, use_sign)
km_train <- kernel$get_kernel_matrix()

kernel <- WeightedCommWordStringKernel(feats_train, feats_test, use_sign)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_weighted_degree_position_string_modular.R

# The Weighted Degree Position String kernel (Weighted Degree kernel with shifts).
# 
# The WD-shift kernel of order d compares two sequences X and
# Y of length L by summing all contributions of k-mer matches of
# lengths k in 1...d, weighted by coefficients beta_k
# allowing for a positional tolerance of up to shift s.
# 

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# weighted_degree_position_string
print('WeightedDegreePositionString')

feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(fm_test_dna)
degree <- as.integer(20)

kernel <- WeightedDegreePositionStringKernel(feats_train, feats_train, degree)

#kernel$set_shifts(zeros(len(fm_train_dna[0]), dtype <- int))

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_weighteddegreestring_modular.R

# The Weighted Degree String kernel.
# 
# The WD kernel of order d compares two sequences X and
# Y of length L by summing all contributions of k-mer matches of
# lengths k in 1...d , weighted by coefficients beta_k. It
# is defined as
# 
#     k(X, Y)=\sum_{k=1}^d\beta_k\sum_{l=1}^{L-k+1}I(u_{k,l}(X)=u_{k,l}(Y)).
# 
# Here, $u_{k,l}(X)$ is the string of length k starting at position
# l of the sequence X and I(.) is the indicator function
# which evaluates to 1 when its argument is true and to 0
# otherwise.
# 

library(shogun)

fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat'))
fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat'))

# weighted_degree_string
print('WeightedDegreeString')

feats_train <- StringCharFeatures("DNA")
dump <- feats_train$set_features(fm_train_dna)
feats_test <- StringCharFeatures("DNA")
dump <- feats_test$set_features(fm_test_dna)
degree <- as.integer(20)

kernel <- WeightedDegreeStringKernel(feats_train, feats_train, degree)

#weights <- arange(1,degree+1,dtype <- double)[::-1]/ \
#	sum(arange(1,degree+1,dtype <- double))
#kernel$set_wd_weights(weights)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/kernel_wordmatch_modular.R

library(shogun)

# Explicit examples on how to use the different kernels

fm_train_word <- as.matrix(read.table('../data/fm_train_word.dat'))
fm_test_word <- as.matrix(read.table('../data/fm_test_word.dat'))

## word_match
#print('WordMatch')
#
#feats_train <- WordFeatures(fm_train_word)
#feats_test <- WordFeatures(fm_test_word)
#degree <- 3
#do_rescale <- TRUE
#scale <- 1.4
#
#kernel <- WordMatchKernel(feats_train, feats_train, degree, do_rescale, scale)
#
#km_train <- kernel$get_kernel_matrix()
#kernel$init(kernel, feats_train, feats_test)
#km_test <- kernel$get_kernel_matrix()

Mkl

../examples/documented/r_modular/mkl_multiclass_modular.R

# In this example we show how to perform Multiple Kernel Learning (MKL)
# with the modular interface for multi-class classification. 
# First, we create a number of base kernels and features.
# These kernels can capture different views of the same features, or actually
# consider entirely different features associated with the same example 
# (e.g. DNA sequences = strings AND gene expression data = real values of the same tissue sample). 
# The base kernels are then subsequently added to a CombinedKernel, which
# contains a weight for each kernel and encapsulates the base kernels
# from the training procedure. When the CombinedKernel between two examples is
# evaluated it computes the corresponding linear combination of kernels according to their weights.
# We then show how to create an MKLMultiClass classifier that trains an SVM and learns the optimal
# weighting of kernels (w.r.t. a given norm q) at the same time. The main difference to the binary
# classification version of MKL is that we can use more than two values as labels, when training
# the classifier.
# Finally, the example shows how to classify with a trained MKLMultiClass classifier.
# 

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))
label_train_multiclass <- as.real(as.matrix(read.table('../data/label_train_multiclass.dat')))

# MKLMulticlass
print('MKLMulticlass')


kernel <- CombinedKernel()
feats_train <- CombinedFeatures()
feats_test <- CombinedFeatures()

subkfeats_train <- RealFeatures()
dump <- subkfeats_train$set_feature_matrix(fm_train_real)
subkfeats_test <- RealFeatures()
dump <- subkfeats_test$set_feature_matrix(fm_test_real)
subkernel <- GaussianKernel(as.integer(10), 1.2)
dump <- feats_train$append_feature_obj(subkfeats_train)
dump <- feats_test$append_feature_obj(subkfeats_test)
dump <- kernel$append_kernel(subkernel)

subkfeats_train <- RealFeatures()
dump <- subkfeats_train$set_feature_matrix(fm_train_real)
subkfeats_test <- RealFeatures()
dump <- subkfeats_test$set_feature_matrix(fm_test_real)
subkernel <- LinearKernel()
dump <- feats_train$append_feature_obj(subkfeats_train)
dump <- feats_test$append_feature_obj(subkfeats_test)
dump <- kernel$append_kernel(subkernel)

subkfeats_train <- RealFeatures()
dump <- subkfeats_train$set_feature_matrix(fm_train_real)
subkfeats_test <- RealFeatures()
dump <- subkfeats_test$set_feature_matrix(fm_test_real)
subkernel <- PolyKernel(as.integer(10), as.integer(2))
dump <- feats_train$append_feature_obj(subkfeats_train)
dump <- feats_test$append_feature_obj(subkfeats_test)
dump <- kernel$append_kernel(subkernel)
dump <- kernel$init(feats_train, feats_train)

C <- 1.2
epsilon <- 1e-5
mkl_eps <- 0.001 
mkl_norm <- 1
num_threads <- as.integer(1)
labels <- MulticlassLabels()
labels$set_labels(label_train_multiclass)

svm <- MKLMulticlass(C, kernel, labels)
dump <- svm$set_epsilon(epsilon)
dump <- svm$parallel$set_num_threads(num_threads)
dump <- svm$set_mkl_epsilon(mkl_eps)
#dump <- svm$set_mkl_norm(1.5)
dump <- svm$train()

dump <- kernel$init(feats_train, feats_test)
lab <- svm$apply()
out <- lab$get_labels()

Preprocessor

../examples/documented/r_modular/preprocessor_logplusone_modular.R

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# LogPlusOne adds one to a dense real-valued vector and takes the logarithm of
# each component of it. It is most useful in situations where the inputs are
# counts: When one compares differences of small counts any difference may matter
# a lot, while small differences in large counts don't. This is what this log
# transformation controls for.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

#LogPlusOne
print('LogPlusOne')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)

preproc <- LogPlusOne()
dump <- preproc$init(feats_train)
dump <- feats_train$add_preproc(preproc)
dump <- feats_train$apply_preproc()
dump <- feats_test$add_preproc(preproc)
dump <- feats_test$apply_preproc()

width <- 1.4
size_cache <- as.integer(10)

kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/preprocessor_normone_modular.R

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# NormOne, normalizes vectors to have norm 1.

library(shogun)

fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat')))
fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat')))

#NormOne
print('NormOne')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)

preproc <- NormOne()
dump <- preproc$init(feats_train)
dump <- feats_train$add_preproc(preproc)
dump <- feats_train$apply_preproc()
dump <- feats_test$add_preproc(preproc)
dump <- feats_test$apply_preproc()

width <- 1.4
size_cache <- as.integer(10)

kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()

../examples/documented/r_modular/preprocessor_prunevarsubmean_modular.R

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# PruneVarSubMean substracts the mean from each feature and removes features that
# have zero variance.

library(shogun)

fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat'))
fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat'))

#PruneVarSubMean
print('PruneVarSubMean')

feats_train <- RealFeatures()
dump <- feats_train$set_feature_matrix(fm_train_real)
feats_test <- RealFeatures()
dump <- feats_test$set_feature_matrix(fm_test_real)

preproc <- PruneVarSubMean()
dump <- preproc$init(feats_train)
dump <- feats_train$add_preproc(preproc)
dump <- feats_train$apply_preproc()
dump <- feats_test$add_preproc(preproc)
dump <- feats_test$apply_preproc()

width <- 1.4
size_cache <- as.integer(10)

kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache)

km_train <- kernel$get_kernel_matrix()
dump <- kernel$init(feats_train, feats_test)
km_test <- kernel$get_kernel_matrix()