SHOGUN
v3.0.0
|
This page lists ready to run shogun examples for the R Modular interface.
To run the examples issue
R -f name_of_example.R
or start R and then type
source('name_of_example.R')
# In this example a multi-class support vector machine is trained on a toy data # set and the trained classifier is then used to predict labels of test # examples. The training algorithm is based on BSVM formulation (L2-soft margin # and the bias added to the objective function) which is solved by the Improved # Mitchell-Demyanov-Malozemov algorithm. The training algorithm uses the Gaussian # kernel of width 2.1 and the regularization constant C=1. The solver stops if the # relative duality gap falls below 1e-5. # # For more details on the used SVM solver see # V.Franc: Optimization Algorithms for Kernel Methods. Research report. # CTU-CMP-2005-22. CTU FEL Prague. 2005. # ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-PhD.pdf . # library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1) # gmnpsvm print('GMNPSVM') feats_train <- RealFeatures() dummy <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dummy <- feats_test$set_feature_matrix(fm_test_real) width <- 2.1 kernel <- GaussianKernel(feats_train, feats_train, width) C <- 1.3 epsilon <- 1e-5 num_threads <- as.integer(1) labels <- MulticlassLabels() labels$set_labels(label_train_multiclass) print(label_train_multiclass) svm <- GMNPSVM(C, kernel, labels) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$train() dump <- kernel$init(feats_train, feats_test) lab <- svm$apply() out <- lab$get_labels()
# In this example a two-class support vector machine classifier is trained on a # toy data set and the trained classifier is then used to predict labels of test # examples. As training algorithm Gradient Projection Decomposition Technique # (GPDT) is used with SVM regularization parameter C=1 and a Gaussian # kernel of width 2.1. The solver returns an epsilon-precise (epsilon=1e-5) solution. # # For more details on GPDT solver see http://dm.unife.it/gpdt . # library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1) # gpbtsvm print('GPBTSVM') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) width <- 2.1 kernel <- GaussianKernel(feats_train, feats_train, width) C <- 0.017 epsilon <- 1e-5 num_threads <- as.integer(2) labels <- BinaryLabels() labels$set_labels(label_train_twoclass) svm <- GPBTSVM(C, kernel, labels) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$train() dump <- kernel$init(feats_train, feats_test) lab <- svm$apply() out <- lab$get_labels()
# This example shows usage of a k-nearest neighbor (KNN) classification rule on # a toy data set. The number of the nearest neighbors is set to k=3 and the distances # are measured by the Euclidean metric. Finally, the KNN rule is applied to predict # labels of test examples. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1) # knn print('KNN') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) distance <- EuclideanDistance() k <- as.integer(3) num_threads <- as.integer(1) labels <- MulticlassLabels() dump <- labels$set_labels(label_train_multiclass) knn <- KNN(k, distance, labels) dump <- knn$parallel$set_num_threads(num_threads) dump <- knn$train(feats_train) lab <- knn$apply(feats_test) out <- lab$get_labels()
# In this example a two-class linear classifier based on the Linear Discriminant # Analysis (LDA) is trained on a toy data set and then the trained classifier is # used to predict test examples. The regularization parameter, which corresponds # to a weight of a unitary matrix added to the covariance matrix, is set to # gamma=3. # # For more details on the LDA see e.g. # http://en.wikipedia.org/wiki/Linear_discriminant_analysis library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1) # lda print('LDA') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) gamma <- 3 labels <- BinaryLabels() labels$set_labels(label_train_twoclass) lda <- LDA(gamma, feats_train, labels) dump <- lda$train() dump <- lda$get_bias() dump <- lda$get_w() dump <- lda$set_features(feats_test) lab <- lda$apply() out <- lab$get_labels()
# In this example a two-class linear support vector machine classifier is trained # on a toy data set and the trained classifier is then used to predict labels of # test examples. As training algorithm the LIBLINEAR solver is used with the SVM # regularization parameter C=0.9 and the bias in the classification rule switched # on and the precision parameters epsilon=1e-5. # # For more details on LIBLINEAR see # http://www.csie.ntu.edu.tw/~cjlin/liblinear/ library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1) # liblinear print('LibLinear') realfeat <- RealFeatures() dump <- realfeat$set_feature_matrix(fm_train_real) feats_train <- SparseRealFeatures() dump <- feats_train$obtain_from_simple(realfeat) realfeat <- RealFeatures() dump <- realfeat$set_feature_matrix(fm_test_real) feats_test <- SparseRealFeatures() dump <- feats_test$obtain_from_simple(realfeat) C <- 1.42 epsilon <- 1e-5 num_threads <- as.integer(1) labels <- BinaryLabels() labels$set_labels(label_train_twoclass) svm <- LibLinear(C, feats_train, labels) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$set_bias_enabled(TRUE) dump <- svm$train() dump <- svm$set_features(feats_test) lab <- svm$apply() out <- lab$get_labels()
# In this example a two-class support vector machine classifier is trained on a # toy data set and the trained classifier is used to predict labels of test # examples. As training algorithm the LIBSVM solver is used with SVM # regularization parameter C=1 and a Gaussian kernel of width 2.1 and the # precision parameter epsilon=1e-5. The example also shows how to retrieve the # support vectors from the train SVM model. # # For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/ library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1) # libsvm print('LibSVM') feats_train <- RealFeatures() dummy <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dummy <- feats_test$set_feature_matrix(fm_test_real) width <- 2.1 kernel <- GaussianKernel(feats_train, feats_train, width) C <- 1.017 epsilon <- 1e-5 num_threads <- as.integer(2) labels <- BinaryLabels() print(label_train_twoclass) dump <- labels$set_labels(label_train_twoclass) svm <- LibSVM(C, kernel, labels) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$train() dump <- kernel$init(feats_train, feats_test) lab <- svm$apply() out <- lab$get_labels()
# In this example a one-class support vector machine classifier is trained on a # toy data set. The training algorithm finds a hyperplane in the RKHS which # separates the training data from the origin. The one-class classifier is # typically used to estimate the support of a high-dimesnional distribution. # For more details see e.g. # B. Schoelkopf et al. Estimating the support of a high-dimensional # distribution. Neural Computation, 13, 2001, 1443-1471. # # In the example, the one-class SVM is trained by the LIBSVM solver with the # regularization parameter C=1 and the Gaussian kernel of width 2.1 and the # precision parameter epsilon=1e-5. # # For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/ library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # libsvm oneclass print('LibSVMOneClass') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) width <- 2.1 kernel <- GaussianKernel(feats_train, feats_train, width) C <- 1.017 epsilon <- 1e-5 num_threads <- as.integer(4) svm <- LibSVMOneClass(C, kernel) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$train() dump <- kernel$init(feats_train, feats_test) lab <- svm$apply() out <- lab$get_labels()
# In this example a two-class support vector machine classifier is trained on a # toy data set and the trained classifier is used to predict labels of test # examples. As training algorithm the Minimal Primal Dual SVM is used with SVM # regularization parameter C=1 and a Gaussian kernel of width 1.2 and the # precision parameter 1e-5. # # For more details on the MPD solver see # Kienzle, W. and B. Schölkopf: Training Support Vector Machines with Multiple # Equality Constraints. Machine Learning: ECML 2005, 182-193. (Eds.) Carbonell, # J. G., J. Siekmann, Springer, Berlin, Germany (11 2005) library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1) # libsvmmulticlass print('LibSVMMulticlass') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) width <- 2.1 kernel <- GaussianKernel(feats_train, feats_train, width) C <- 1.2 epsilon <- 1e-5 num_threads <- as.integer(8) labels <- MulticlassLabels() labels$set_labels(label_train_multiclass) svm <- MulticlassLibSVM(C, kernel, labels) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$train() dump <- kernel$init(feats_train, feats_test) lab <- svm$apply() out <- lab$get_labels()
library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_multiclass <- as.real(read.table('../data/label_train_multiclass.dat')$V1) print('MulticlassLibSVM') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) width <- 2.1 kernel <- GaussianKernel(feats_train, feats_train, width) C <- 1.017 epsilon <- 1e-5 num_threads <- as.integer(8) labels <- MulticlassLabels() labels$set_labels(label_train_multiclass) svm <- MulticlassLibSVM(C, kernel, labels) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$train() dump <- kernel$init(feats_train, feats_test) lab <- svm$apply() out <- lab$get_labels()
# This example shows usage of the Perceptron algorithm for training a two-class # linear classifier, i.e. y = sign( <x,w>+b). The Perceptron algorithm works by # iteratively passing though the training examples and applying the update rule on # those examples which are misclassified by the current classifier. The Perceptron # update rule reads # # w(t+1) = w(t) + alpha * y_t * x_t # b(t+1) = b(t) + alpha * y_t # # where (x_t,y_t) is feature vector and label (must be +1/-1) of the misclassified example # (w(t),b(t)) are the current parameters of the linear classifier # (w(t+1),b(t+1)) are the new parameters of the linear classifier # alpha is the learning rate; in this examples alpha=1 # # The Perceptron algorithm iterates until all training examples are correctly # classified or the prescribed maximal number of iterations, in this example # max_iter=1000, is reached. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1) # perceptron print('Perceptron') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) learn_rate <- 1. max_iter <- as.integer(1000) num_threads <- as.integer(1) labels <- BinaryLabels() labels$set_labels(label_train_twoclass) perceptron <- Perceptron(feats_train, labels) dump <- perceptron$set_learn_rate(learn_rate) dump <- perceptron$set_max_iter(max_iter) dump <- perceptron$train() dump <- perceptron$set_features(feats_test) lab <- perceptron$apply() out <- lab$get_labels()
# In this example a two-class support vector machine classifier is trained on a # DNA splice-site detection data set and the trained classifier is used to predict # labels on test set. As training algorithm SVM^light is used with SVM # regularization parameter C=1.2 and the Weighted Degree kernel of degree 20 and # the precision parameter epsilon=1e-5. # # For more details on the SVM^light see # T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel # Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999. # # For more details on the Weighted Degree kernel see # G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively # spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) label_train_dna <- as.real(read.table('../data/label_train_dna.dat')$V1) # svm light dosvmlight <- function() { print('SVMLight') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_feature_matrix(feats_train, fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_feature_matrix(feats_test, fm_test_dna) degree <- as.integer(20) kernel <- WeightedDegreeStringKernel(feats_train, feats_train, degree) C <- 1.017 epsilon <- 1e-5 num_threads <- as.integer(3) labels <- Labels(label_train_dna) svm <- SVMLight(C, kernel, labels) dump <- svm$set_epsilon(svm, epsilon) dump <- svm$parallel$set_num_threads(svm$parallel, num_threads) dump <- svm$train(svm) dump <- kernel$init(kernel, feats_train, feats_test) lab <- svm$apply(svm) out <- lab$get_labels(lab) } try(dosvmlight())
# In this example a two-class linear support vector machine classifier (SVM) is # trained on a toy data set and the trained classifier is used to predict labels # of test examples. As training algorithm the SVMLIN solver is used with the SVM # regularization parameter C=0.9 and the bias in the classification rule switched # on and the precision parameter epsilon=1e-5. The example also shows how to # retrieve parameters (vector w and bias b)) of the trained linear classifier. # # For more details on the SVMLIN solver see # V. Sindhwani, S.S. Keerthi. Newton Methods for Fast Solution of Semi-supervised # Linear SVMs. Large Scale Kernel Machines MIT Press (Book Chapter), 2007 library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1) # svm lin print('SVMLin') realfeat <- RealFeatures() dump <- realfeat$set_feature_matrix(fm_train_real) feats_train <- SparseRealFeatures() dump <- feats_train$obtain_from_simple(realfeat) realfeat <- RealFeatures() dump <- realfeat$set_feature_matrix(fm_test_real) feats_test <- SparseRealFeatures() dump <- feats_test$obtain_from_simple(realfeat) C <- 1.42 epsilon <- 1e-5 num_threads <- as.integer(1) labels <- BinaryLabels() labels$set_labels(label_train_twoclass) svm <- SVMLin(C, feats_train, labels) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$set_bias_enabled(TRUE) dump <- svm$train() dump <- svm$set_features(feats_test) dump <- svm$get_bias() dump <- svm$get_w() lab <- svm$apply() out <- lab$get_labels()
# In this example a two-class linear support vector machine classifier is trained # on a toy data set and the trained classifier is used to predict labels of test # examples. As training algorithm the OCAS solver is used with the SVM # regularization parameter C=0.9 and the bias term in the classification rule # switched off and the precision parameter epsilon=1e-5 (duality gap). # # For more details on the OCAS solver see # V. Franc, S. Sonnenburg. Optimized Cutting Plane Algorithm for Large-Scale Risk # Minimization.The Journal of Machine Learning Research, vol. 10, # pp. 2157--2192. October 2009. # library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1) # svm ocas print('SVMOcas') realfeat <- RealFeatures() dump <- realfeat$set_feature_matrix(fm_train_real) feats_train <- SparseRealFeatures() dump <- feats_train$obtain_from_simple(realfeat) realfeat <- RealFeatures() dump <- realfeat$set_feature_matrix(fm_test_real) dump <- feats_test <- SparseRealFeatures() feats_test$obtain_from_simple(realfeat) C <- 1.42 epsilon <- 1e-5 num_threads <- as.integer(1) labels <- BinaryLabels() labels$set_labels(label_train_twoclass) svm <- SVMOcas(C, feats_train, labels) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$set_bias_enabled(FALSE) dump <- svm$train() dump <- svm$set_features(feats_test) lab <- svm$apply() out <- lab$get_labels()
# In this example a two-class linear support vector machine classifier is trained # on a toy data set and the trained classifier is used to predict labels of test # examples. As training algorithm the Stochastic Gradient Descent (SGD) solver is # used with the SVM regularization parameter C=0.9. The number of iterations, i.e. # passes though all training examples, is set to num_iter=5 . # # For more details on the SGD solver see # L. Bottou, O. Bousquet. The tradeoff of large scale learning. In NIPS 20. MIT # Press. 2008. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_twoclass <- as.real(read.table('../data/label_train_twoclass.dat')$V1) # sgd print('SVMSGD') realfeat <- RealFeatures() dummy <- realfeat$set_feature_matrix(fm_train_real) feats_train <- SparseRealFeatures() dump <- feats_train$obtain_from_simple(realfeat) realfeat <- RealFeatures() dummy <- realfeat$set_feature_matrix(fm_test_real) feats_test <- SparseRealFeatures() dump <- feats_test$obtain_from_simple(realfeat) C <- 2.3 num_threads <- as.integer(1) labels <- BinaryLabels() labels$set_labels(label_train_twoclass) svm <- SVMSGD(C, feats_train, labels) #dump <- svm$io$set_loglevel(0) #dump <- svm$set_epochs(num_iter) dump <- svm$train() dump <- svm$set_features(feats_test) lab <- svm$apply() out <- lab$get_labels()
# In this example the Histogram algorithm object computes a histogram over all # 16bit unsigned integers in the features. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) # Histogram print('Histogram') order <- as.integer(3) start <- as.integer(order-1) gap <- as.integer(0) reverse <- FALSE charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_train_dna) feats=StringWordFeatures(charfeat$get_alphabet()) dump <- feats$obtain_from_char(charfeat, start, order, gap, reverse) preproc=SortWordString() dump <- preproc$init(feats) dump <- feats$add_preproc(preproc) dump <- feats$apply_preproc() histo=Histogram(feats) dump <- histo$train() dump <- histo$get_histogram() num_examples <- feats$get_num_vectors() num_param <- histo$get_num_model_parameters() # commented out as this is quite time consuming #derivs=matrix(0,num_param, num_examples) #for (i in 0:(num_examples-1)) #{ # for (j in 0:(num_param-1)) # { # derivs[j,i]=histo$get_log_derivative(histo, j, i) # } #} dump <- histo$get_log_likelihood() dump <- histo$get_log_likelihood_sample()
# In this example a hidden markov model with 3 states and 6 transitions is trained # on a string data set. After calling the constructor of the HMM class specifying # the number of states and transitions the model is trained. Via the Baum-Welch # algorithm the optimal transition and emission probabilities are estimated. The # best path, i.e. the path with highest probability given the model can then be # calculated using get_best_path_state. library(shogun) fm_train_cube <- as.matrix(read.table('../data/fm_train_cube.dat', colClasses=c('character'))) # HMM print('HMM') N <- as.integer(3) M <- as.integer(6) pseudo <- 1e-1 order <- as.integer(1) start <- as.integer(order-1) gap <- as.integer(0) reverse <- FALSE num_examples <- as.integer(2) charfeat <- StringCharFeatures("CUBE") dump <- charfeat$set_features(fm_train_cube) feats <- StringWordFeatures(charfeat$get_alphabet()) dump <- feats$obtain_from_char(charfeat, start, order, gap, reverse) preproc <- SortWordString() dump <- preproc$init(feats) dump <- feats$add_preproc(preproc) dump <- feats$apply_preproc() hmm <- HMM(feats, N, M, pseudo) dump <- hmm$train() dump <- hmm$baum_welch_viterbi_train("BW_NORMAL") num_examples <- feats$get_num_vectors() num_param <- hmm$get_num_model_parameters() derivs <- matrix(0, num_param, num_examples) for (i in 0:(num_examples-1)) { for (j in 0:(num_param-1)) { derivs[j,i] <- hmm$get_log_derivative(j, i) } } best_path <- 0 best_path_state <- 0 for (i in 0:(num_examples-1)) { best_path = best_path + hmm$best_path(i) for (j in 0:(N-1)) { best_path_state = best_path_state + hmm$get_best_path_state(i, j) } } dump <- hmm$get_log_likelihood() dump <- hmm$get_log_likelihood_sample()
# Trains an inhomogeneous Markov chain of order 3 on a DNA string data set. Due to # the structure of the Markov chain it is very similar to a HMM with just one # chain of connected hidden states - that is why we termed this linear HMM. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) # Linear HMM print('LinearHMM') order <- as.integer(3) start <- as.integer(order-1) gap <- as.integer(0) reverse <- FALSE charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_train_dna) feats <- StringWordFeatures(charfeat$get_alphabet()) dump <- feats$obtain_from_char(charfeat, start, order, gap, reverse) preproc <- SortWordString() dump <- preproc$init(feats) dump <- feats$add_preproc(preproc) dump <- feats$apply_preproc() hmm <- LinearHMM(feats) dump <- hmm$train() dump <- hmm$get_transition_probs() num_examples <- feats$get_num_vectors() num_param <- hmm$get_num_model_parameters() derivs <- matrix(0, num_param, num_examples) for (i in 0:(num_examples-1)) { for (j in 0:(num_param-1)) { derivs[j,i] <- hmm$get_log_derivative(j, i) } } #dump <- hmm$get_log_likelihood() dump <- hmm$get_log_likelihood_sample()
# This example demonstrates the use of the AUC Kernel, which # can be used to maximize AUC instead of margin in SVMs. library(shogun) fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat')) fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat')) # auc #print('AUC') # #feats_train <- RealFeatures(fm_train_real) #feats_test <- RealFeatures(fm_test_real) #width <- 1.7 #subkernel <- GaussianKernel(feats_train, feats_test, width) # #num_feats <- 2; # do not change! #len_train <- 11 #len_test <- 17 #data <- uint16((len_train-1)*rand(num_feats, len_train)) #feats_train <- WordFeatures(data) #data <- uint16((len_test-1)*rand(num_feats, len_test)) #feats_test <- WordFeatures(data) # #kernel <- AUCKernel(feats_train, feats_test, subkernel) # #km_train <- kernel$get_kernel_matrix() #kernel$init(kernel, feats_train, feats_test) #km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the chi2-kernel on real data, where # each column of the matrices corresponds to one training/test example. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # chi2 print('Chi2') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) width <- 1.4 size_cache <- as.integer(10) kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a combined kernel, which is a weighted sum of # in this case three kernels on real valued data. The sub-kernel weights are all set to 1. # library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) fm_train_dna <- t(as.matrix(read.table('../data/fm_train_dna.dat'))) fm_test_dna <- t(as.matrix(read.table('../data/fm_test_dna.dat'))) # combined print('Combined') kernel <- CombinedKernel() feats_train <- CombinedFeatures() feats_test <- CombinedFeatures() subkfeats_train <- RealFeatures() dump <- subkfeats_train$set_feature_matrix(fm_train_real) subkfeats_test <- RealFeatures() dump <- subkfeats_test$set_feature_matrix(fm_test_real) subkernel <- GaussianKernel(as.integer(10), 1.6) dump <- feats_train$append_feature_obj(subkfeats_train) dump <- feats_test$append_feature_obj(subkfeats_test) dump <- kernel$append_kernel(subkernel) subkfeats_train <- StringCharFeatures("DNA") dump <- subkfeats_train$set_features(fm_train_dna) subkfeats_test <- StringCharFeatures("DNA") dump <- subkfeats_test$set_features(fm_test_dna) degree <- as.integer(3) subkernel <- FixedDegreeStringKernel(as.integer(10), degree) dump <- feats_train$append_feature_obj(subkfeats_train) dump <- feats_test$append_feature_obj(subkfeats_test) dump <- kernel$append_kernel(subkernel) subkfeats_train <- StringCharFeatures("DNA") dump <- subkfeats_train$set_features(fm_train_dna) subkfeats_test <- StringCharFeatures("DNA") dump <- subkfeats_test$set_features(fm_test_dna) subkernel <- LocalAlignmentStringKernel(as.integer(10)) dump <- feats_train$append_feature_obj(subkfeats_train) dump <- feats_test$append_feature_obj(subkfeats_test) dump <- kernel$append_kernel(subkernel) dump <- kernel$init(feats_train, feats_train) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the CommUlongString-kernel. This kernel # sums over k-mere matches (k='order'). For efficient computing a preprocessor is used # that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted # only once. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # comm_ulong_string print('CommUlongString') order <- as.integer(3) start <- as.integer(order-1) gap <- as.integer(0) reverse <- FALSE charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_train_dna) feats_train <- StringUlongFeatures(charfeat$get_alphabet()) dump <- feats_train$obtain_from_char(charfeat, start, order, gap, reverse) preproc <- SortUlongString() dump <- preproc$init(feats_train) dump <- feats_train$add_preproc(preproc) dump <- feats_train$apply_preproc() charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_test_dna) feats_test <- StringUlongFeatures(charfeat$get_alphabet()) dump <- feats_test$obtain_from_char(charfeat, start, order, gap, reverse) dump <- feats_test$add_preproc(preproc) dump <- feats_test$apply_preproc() use_sign <- FALSE kernel <- CommUlongStringKernel(feats_train, feats_train, use_sign) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the CommWordString-kernel (aka # Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel # sums over k-mere matches (k='order'). For efficient computing a preprocessor is used # that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted # only once. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # comm_word_string print('CommWordString') order <- as.integer(3) gap <- as.integer(0) start <- as.integer(order-1) reverse <- FALSE charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_train_dna) feats_train <- StringWordFeatures(charfeat$get_alphabet()) dump <- feats_train$obtain_from_char(charfeat, start, order, gap, reverse) preproc <- SortWordString() dump <- preproc$init(feats_train) dump <- feats_train$add_preproc(preproc) dump <- feats_train$apply_preproc() charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_test_dna) feats_test <- StringWordFeatures(charfeat$get_alphabet()) dump <- feats_test$obtain_from_char(charfeat, start, order, gap, reverse) dump <- feats_test$add_preproc(preproc) dump <- feats_test$apply_preproc() use_sign <- FALSE kernel <- CommWordStringKernel(feats_train, feats_train, use_sign) km_train <- kernel$get_kernel_matrix() kernel <- CommWordStringKernel(feats_train, feats_test, use_sign) km_test <- kernel$get_kernel_matrix()
# The constant kernel gives a trivial kernel matrix with all entries set to the same value # defined by the argument 'c'. # library(shogun) fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat')) fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat')) # const print('Const') feats_train <- RealFeatures() dummy <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dummy <- feats_test$set_feature_matrix(fm_test_real) c <- 23. kernel <- ConstKernel(feats_train, feats_train, c) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# A user defined custom kernel is assigned in this example, for which only the lower triangle # may be given (set_triangle_kernel_matrix_from_triangle) or # a full matrix (set_full_kernel_matrix_from_full), or a full matrix which is then internally stored as a # triangle (set_triangle_kernel_matrix_from_full). Labels for the examples are given, a svm is trained and # the svm is used to classify the examples. # library(shogun) ## custom #print('Custom') # #dim <- 7 #data <- rand(dim, dim) #feats <- RealFeatures(data) #symdata <- data+data' #lowertriangle <- array([symdata[(x,y)] for x in xrange(symdata.shape[1]) # for y in xrange(symdata.shape[0]) if y< <- x]) # #kernel <- CustomKernel(feats, feats) # #kernel$set_triangle_kernel_matrix_from_triangle(lowertriangle) #km_triangletriangle <- kernel$get_kernel_matrix() # #kernel$set_triangle_kernel_matrix_from_full(symdata) #km_fulltriangle <- kernel$get_kernel_matrix() # #kernel$set_full_kernel_matrix_from_full(data) #km_fullfull <- kernel$get_kernel_matrix()
# This is an example for the initialization of the diag-kernel. # The diag kernel has all kernel matrix entries but those on # the main diagonal set to zero. library(shogun) fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat')) fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat')) # diag print('Diag') feats_train <- RealFeatures() dummy <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dummy <- feats_test$set_feature_matrix(fm_test_real) diag <- 23. kernel <- DiagKernel(feats_train, feats_train, diag) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# With the distance kernel one can use any of the following distance metrics: # BrayCurtisDistance() # CanberraMetric() # CanberraWordDistance() # ChebyshewMetric() # ChiSquareDistance() # CosineDistance() # Distance() # EuclidianDistance() # GeodesicMetric() # HammingWordDistance() # JensenMetric() # ManhattanMetric() # ManhattanWordDistance() # MinkowskiMetric() # RealDistance() # SimpleDistance() # SparseDistance() # SparseEuclidianDistance() # StringDistance() # TanimotoDistance() # library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # distance print('Distance') feats_train <- RealFeatures() dummy <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dummy <- feats_test$set_feature_matrix(fm_test_real) width <- 1.7 distance <- EuclideanDistance() kernel <- DistanceKernel(feats_train, feats_test, width, distance) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# The FixedDegree String kernel takes as input two strings of same size and counts the number of matches of length d. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # fixed_degree_string print('FixedDegreeString') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_features(fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_features(fm_test_dna) degree <- as.integer(3) kernel <- FixedDegreeStringKernel(feats_train, feats_train, degree) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # gaussian print('Gaussian') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) width <- 1.9 kernel <- GaussianKernel(feats_train, feats_train, width) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# An experimental kernel inspired by the WeightedDegreePositionStringKernel and the Gaussian kernel. # The idea is to shift the dimensions of the input vectors against eachother. 'shift_step' is the step # size of the shifts and max_shift is the maximal shift. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # gaussian_shift print('GaussianShift') feats_train <- RealFeatures() dummy <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dummy <- feats_test$set_feature_matrix(fm_test_real) width <- 1.8 max_shift <- as.integer(2) shift_step <- as.integer(1) kernel <- GaussianShiftKernel( feats_train, feats_train, width, max_shift, shift_step) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# The HistogramWordString computes the TOP kernel on inhomogeneous Markov Chains. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) label_train_dna <- as.real(as.matrix(read.table('../data/label_train_dna.dat'))) # plugin_estimate print('PluginEstimate w/ HistogramWord') order <- as.integer(3) start <- as.integer(order-1) gap <- as.integer(0) reverse <- FALSE charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_train_dna) feats_train <- StringWordFeatures(charfeat$get_alphabet()) dump <- feats_train$obtain_from_char(charfeat, start, order, gap, reverse) charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_test_dna) feats_test <- StringWordFeatures(charfeat$get_alphabet()) dump <- feats_test$obtain_from_char(charfeat, start, order, gap, reverse) pie <- PluginEstimate() labels <- BinaryLabels() labels$set_labels(label_train_dna) dump <- pie$set_labels(labels) dump <- pie$set_features(feats_train) dump <- pie$train() kernel <- HistogramWordStringKernel(feats_train, feats_train, pie) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) dump <- pie$set_features(feats_test) km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a linear kernel on raw byte # data. library(shogun) # linear byte print('LinearByte') feats_train <- ByteFeatures(CSVFile('../data/fm_train_byte.dat')) feats_test <- ByteFeatures(CSVFile('../data/fm_test_byte.dat')) kernel <- LinearKernel(feats_train, feats_train) km_train <- kernel$get_kernel_matrix() kernel <- LinearKernel(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a linear kernel on real valued # data using scaling factor 1.2. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # linear print('Linear') feats_train <- RealFeatures() dummy <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dummy <- feats_test$set_feature_matrix(fm_test_real) scale <- 1.2 kernel <- LinearKernel(feats_train, feats_train) dump <- kernel$set_normalizer(AvgDiagKernelNormalizer(scale)) km_train <- kernel$get_kernel_matrix() kernel <- LinearKernel(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a linear kernel on string data. The # strings are all of the same length and consist of the characters 'ACGT' corresponding # to the DNA-alphabet. Each column of the matrices of type char corresponds to # one training/test example. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # linear_string print('LinearString') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_features(fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_features(fm_test_dna) kernel <- LinearStringKernel(feats_train, feats_train) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of a linear kernel on word (2byte) # data. library(shogun) fm_train_word <- as.matrix(read.table('../data/fm_train_word.dat')) fm_test_word <- as.matrix(read.table('../data/fm_test_word.dat')) ## linear_word #print('LinearWord') # #feats_train <- WordFeatures(fm_train_word) #feats_test <- WordFeatures(fm_test_word) #do_rescale <- TRUE #scale <- 1.4 # #kernel <- LinearWordKernel(feats_train, feats_train, do_rescale, scale) # #km_train <- kernel$get_kernel_matrix() #kernel$init(kernel, feats_train, feats_test) #km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the local alignment kernel on # DNA sequences, where each column of the matrices of type char corresponds to # one training/test example. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # local_alignment_string print('LocalAlignmentString') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_features(fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_features(fm_test_dna) kernel <- LocalAlignmentStringKernel(feats_train, feats_train) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This example initializes the locality improved string kernel. The locality improved string # kernel is defined on sequences of the same length and inspects letters matching at # corresponding positions in both sequences. The kernel sums over all matches in windows of # length l and takes this sum to the power of 'inner_degree'. The sum over all these # terms along the sequence is taken to the power of 'outer_degree'. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # locality_improved_string print('LocalityImprovedString') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_features(fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_features(fm_test_dna) l <- as.integer(5) inner_degree <- as.integer(5) outer_degree <- as.integer(7) kernel <- LocalityImprovedStringKernel( feats_train, feats_train, l, inner_degree, outer_degree) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This is an example initializing the oligo string kernel which takes distances # between matching oligos (k-mers) into account via a gaussian. Variable 'k' defines the length # of the oligo and variable 'w' the width of the gaussian. The oligo string kernel is # implemented for the DNA-alphabet 'ACGT'. # library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # oligo_string print('OligoString') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_features(fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_features(fm_test_dna) k <- as.integer(3) width <- 1.2 size_cache <- as.integer(10) kernel <- OligoStringKernel(size_cache, k, width) dump <- kernel$init(feats_train, feats_train) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This example initializes the polynomial kernel with real data. # If variable 'inhomogene' is 'True' +1 is added to the scalar product # before taking it to the power of 'degree'. If 'use_normalization' is # set to 'true' then kernel matrix will be normalized by the square roots # of the diagonal entries. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # poly print('Poly') feats_train <- RealFeatures() dummy <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dummy <- feats_test$set_feature_matrix(fm_test_real) degree <- as.integer(4) inhomogene <- FALSE kernel <- PolyKernel( feats_train, feats_train, degree, inhomogene) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# This is an example for the initialization of the PolyMatchString kernel on string data. # The PolyMatchString kernel sums over the matches of two stings of the same length and # takes the sum to the power of 'degree'. The strings consist of the characters 'ACGT' corresponding # to the DNA-alphabet. Each column of the matrices of type char corresponds to # one training/test example. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # poly_match_string print('PolyMatchString') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_features(fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_features(fm_test_dna) degree <- as.integer(3) inhomogene <- FALSE kernel <- PolyMatchStringKernel(feats_train, feats_train, degree, inhomogene) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# The PolyMatchWordString kernel is defined on strings of equal length. # The kernel sums over the matches of two stings of the same length and # takes the sum to the power of 'degree'. The strings in this example # consist of the characters 'ACGT' corresponding to the DNA-alphabet. Each # column of the matrices of type char corresponds to one training/test example. library(shogun) fm_train_word <- as.matrix(read.table('../data/fm_train_word.dat')) fm_test_word <- as.matrix(read.table('../data/fm_test_word.dat')) ## poly_match_word #print('PolyMatchWord') # #feats_train <- WordFeatures(traindata_word) #feats_test <- WordFeatures(testdata_word) #degree <- 2 #inhomogene <- TRUE # #kernel <- PolyMatchWordKernel(feats_train, feats_train, degree, inhomogene) # #km_train <- kernel$get_kernel_matrix() #kernel$init(kernel, feats_train, feats_test) #km_test <- kernel$get_kernel_matrix()
# The standard Sigmoid kernel computed on dense real valued features. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # sigmoid print('Sigmoid') feats_train <- RealFeatures() dummy <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dummy <- feats_test$set_feature_matrix(fm_test_real) size_cache <- as.integer(10) gamma <- 1.2 coef0 <- 1.3 kernel <- SigmoidKernel(feats_train, feats_train, size_cache, gamma, coef0) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# SimpleLocalityImprovedString kernel, is a `simplified' and better performing version of the Locality improved kernel. library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # simple_locality_improved_string print('SimpleLocalityImprovedString') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_features(fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_features(fm_test_dna) l <- as.integer(5) inner_degree <- as.integer(5) outer_degree <- as.integer(7) kernel <- SimpleLocalityImprovedStringKernel( feats_train, feats_train, l, inner_degree, outer_degree) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# The well known Gaussian kernel (swiss army knife for SVMs) on sparse real valued features. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # sparse_gaussian print('SparseGaussian') feat <- RealFeatures() dummy <- feat$set_feature_matrix(fm_train_real) feats_train <- SparseRealFeatures() dump <- feats_train$obtain_from_simple(feat) feat <- RealFeatures() dummy <- feat$set_feature_matrix(fm_test_real) feats_test <- SparseRealFeatures() dump <- feats_test$obtain_from_simple(feat) width <- 1.1 kernel <- GaussianKernel(feats_train, feats_train, width) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# Computes the standard linear kernel on sparse real valued features. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # sparse_linear print('SparseLinear') feat <- RealFeatures() dummy <- feat$set_feature_matrix(fm_train_real) feats_train <- SparseRealFeatures() dump <- feats_train$obtain_from_simple(feat) feat <- RealFeatures() dummy <- feat$set_feature_matrix(fm_test_real) feats_test <- SparseRealFeatures() dump <- feats_test$obtain_from_simple(feat) scale <- 1.1 kernel <- LinearKernel(feats_train,feats_train) dump <- kernel$set_normalizer(AvgDiagKernelNormalizer(scale))
# Computes the standard polynomial kernel on sparse real valued features. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) # sparse_poly print('SparsePoly') feat <- RealFeatures() dummy <- feat$set_feature_matrix(fm_train_real) feats_train <- SparseRealFeatures() dump <- feats_train$obtain_from_simple(feat) feat <- RealFeatures() dummy <- feat$set_feature_matrix(fm_test_real) feats_test <- SparseRealFeatures() dump <- feats_test$obtain_from_simple(feat) size_cache <- as.integer(10) degree <- as.integer(3) inhomogene <- TRUE kernel <- PolyKernel(feats_train, feats_train, degree, inhomogene) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# The class TOPFeatures implements TOP kernel features obtained from # two Hidden Markov models. # # It was used in # # K. Tsuda, M. Kawanabe, G. Raetsch, S. Sonnenburg, and K.R. Mueller. A new # discriminative kernel from probabilistic models. Neural Computation, # 14:2397-2414, 2002. # # which also has the details. # # Note that TOP-features are computed on the fly, so to be effective feature # caching should be enabled. # # It inherits its functionality from CSimpleFeatures, which should be # consulted for further reference. # library(shogun) size_cache=as.integer(0) fm_train_cube <- as.matrix(read.table('../data/fm_train_cube.dat', colClasses=c('character'))) fm_test_cube <- as.matrix(read.table('../data/fm_test_cube.dat', colClasses=c('character'))) # top_fisher print('TOP/Fisher on PolyKernel') N <- as.integer(3) M <- as.integer(6) pseudo <- 1e-1 order <- as.integer(1) start <- as.integer(order-1) gap <- as.integer(0) reverse <- FALSE charfeat <- StringCharFeatures("CUBE") dump <- charfeat$set_features(fm_train_cube) wordfeats_train <- StringWordFeatures(charfeat$get_alphabet()) dump <- wordfeats_train$obtain_from_char(charfeat, start, order, gap, reverse) preproc <- SortWordString() dump <- preproc$init(wordfeats_train) dump <- wordfeats_train$add_preproc(preproc) dump <- wordfeats_train$apply_preproc() charfeat <- StringCharFeatures("CUBE") dump <- charfeat$set_features(fm_test_cube) wordfeats_test <- StringWordFeatures(charfeat$get_alphabet()) dump <- wordfeats_test$obtain_from_char(charfeat, start, order, gap, reverse) dump <- wordfeats_test$add_preproc(preproc) dump <- wordfeats_test$apply_preproc() pos <- HMM(wordfeats_train, N, M, pseudo) dump <- pos$train() dump <- pos$baum_welch_viterbi_train("BW_NORMAL") neg <- HMM(wordfeats_train, N, M, pseudo) dump <- neg$train() dump <- neg$baum_welch_viterbi_train("BW_NORMAL") pos_clone <- HMM(pos) neg_clone <- HMM(neg) dump <- pos_clone$set_observations(wordfeats_test) dump <- neg_clone$set_observations(wordfeats_test) feats_train <- TOPFeatures(size_cache, pos, neg, FALSE, FALSE) feats_test <- TOPFeatures(size_cache, pos_clone, neg_clone, FALSE, FALSE) kernel <- PolyKernel(feats_train, feats_train, as.integer(1), FALSE) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix() feats_train <- FKFeatures(size_cache, pos, neg) dump <- feats_train$set_opt_a(-1); #estimate prior feats_test <- FKFeatures(size_cache, pos_clone, neg_clone) dump <- feats_test$set_a(feats_train$get_a()); #use prior from training data kernel <- PolyKernel(feats_train, feats_train, as.integer(1), FALSE) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# The WeightedCommWordString kernel may be used to compute the weighted # spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer # length is weighted by some coefficient \f$\beta_k\f$) from strings that have # been mapped into unsigned 16bit integers. # # These 16bit integers correspond to k-mers. To applicable in this kernel they # need to be sorted (e.g. via the SortWordString pre-processor). # # It basically uses the algorithm in the unix "comm" command (hence the name) # to compute: # # k({\bf x},({\bf x'})= \sum_{k=1}^K\beta_k\Phi_k({\bf x})\cdot \Phi_k({\bf x'}) # # where \f$\Phi_k\f$ maps a sequence \f${\bf x}\f$ that consists of letters in # \f$\Sigma\f$ to a feature vector of size \f$|\Sigma|^k\f$. In this feature # vector each entry denotes how often the k-mer appears in that \f${\bf x}\f$. # # Note that this representation is especially tuned to small alphabets # (like the 2-bit alphabet DNA), for which it enables spectrum kernels # of order 8. # # For this kernel the linadd speedups are quite efficiently implemented using # direct maps. # library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # weighted_comm_word_string print('WeightedCommWordString') order <- as.integer(3) start <- as.integer(order-1) gap <- as.integer(0) reverse <- TRUE charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_train_dna) feats_train <- StringWordFeatures(charfeat$get_alphabet()) dump <- feats_train$obtain_from_char(charfeat, start, order, gap, reverse) preproc <- SortWordString() dump <- preproc$init(feats_train) dump <- feats_train$add_preproc(preproc) dump <- feats_train$apply_preproc() charfeat <- StringCharFeatures("DNA") dump <- charfeat$set_features(fm_test_dna) feats_test <- StringWordFeatures(charfeat$get_alphabet()) dump <- feats_test$obtain_from_char(charfeat, start, order, gap, reverse) dump <- feats_test$add_preproc(preproc) dump <- feats_test$apply_preproc() use_sign <- FALSE kernel <- WeightedCommWordStringKernel(feats_train, feats_train, use_sign) km_train <- kernel$get_kernel_matrix() kernel <- WeightedCommWordStringKernel(feats_train, feats_test, use_sign) km_test <- kernel$get_kernel_matrix()
# The Weighted Degree Position String kernel (Weighted Degree kernel with shifts). # # The WD-shift kernel of order d compares two sequences X and # Y of length L by summing all contributions of k-mer matches of # lengths k in 1...d, weighted by coefficients beta_k # allowing for a positional tolerance of up to shift s. # library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # weighted_degree_position_string print('WeightedDegreePositionString') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_features(fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_features(fm_test_dna) degree <- as.integer(20) kernel <- WeightedDegreePositionStringKernel(feats_train, feats_train, degree) #kernel$set_shifts(zeros(len(fm_train_dna[0]), dtype <- int)) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# The Weighted Degree String kernel. # # The WD kernel of order d compares two sequences X and # Y of length L by summing all contributions of k-mer matches of # lengths k in 1...d , weighted by coefficients beta_k. It # is defined as # # k(X, Y)=\sum_{k=1}^d\beta_k\sum_{l=1}^{L-k+1}I(u_{k,l}(X)=u_{k,l}(Y)). # # Here, $u_{k,l}(X)$ is the string of length k starting at position # l of the sequence X and I(.) is the indicator function # which evaluates to 1 when its argument is true and to 0 # otherwise. # library(shogun) fm_train_dna <- as.matrix(read.table('../data/fm_train_dna.dat')) fm_test_dna <- as.matrix(read.table('../data/fm_test_dna.dat')) # weighted_degree_string print('WeightedDegreeString') feats_train <- StringCharFeatures("DNA") dump <- feats_train$set_features(fm_train_dna) feats_test <- StringCharFeatures("DNA") dump <- feats_test$set_features(fm_test_dna) degree <- as.integer(20) kernel <- WeightedDegreeStringKernel(feats_train, feats_train, degree) #weights <- arange(1,degree+1,dtype <- double)[::-1]/ \ # sum(arange(1,degree+1,dtype <- double)) #kernel$set_wd_weights(weights) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
library(shogun) # Explicit examples on how to use the different kernels fm_train_word <- as.matrix(read.table('../data/fm_train_word.dat')) fm_test_word <- as.matrix(read.table('../data/fm_test_word.dat')) ## word_match #print('WordMatch') # #feats_train <- WordFeatures(fm_train_word) #feats_test <- WordFeatures(fm_test_word) #degree <- 3 #do_rescale <- TRUE #scale <- 1.4 # #kernel <- WordMatchKernel(feats_train, feats_train, degree, do_rescale, scale) # #km_train <- kernel$get_kernel_matrix() #kernel$init(kernel, feats_train, feats_test) #km_test <- kernel$get_kernel_matrix()
# In this example we show how to perform Multiple Kernel Learning (MKL) # with the modular interface for multi-class classification. # First, we create a number of base kernels and features. # These kernels can capture different views of the same features, or actually # consider entirely different features associated with the same example # (e.g. DNA sequences = strings AND gene expression data = real values of the same tissue sample). # The base kernels are then subsequently added to a CombinedKernel, which # contains a weight for each kernel and encapsulates the base kernels # from the training procedure. When the CombinedKernel between two examples is # evaluated it computes the corresponding linear combination of kernels according to their weights. # We then show how to create an MKLMultiClass classifier that trains an SVM and learns the optimal # weighting of kernels (w.r.t. a given norm q) at the same time. The main difference to the binary # classification version of MKL is that we can use more than two values as labels, when training # the classifier. # Finally, the example shows how to classify with a trained MKLMultiClass classifier. # library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) label_train_multiclass <- as.real(as.matrix(read.table('../data/label_train_multiclass.dat'))) # MKLMulticlass print('MKLMulticlass') kernel <- CombinedKernel() feats_train <- CombinedFeatures() feats_test <- CombinedFeatures() subkfeats_train <- RealFeatures() dump <- subkfeats_train$set_feature_matrix(fm_train_real) subkfeats_test <- RealFeatures() dump <- subkfeats_test$set_feature_matrix(fm_test_real) subkernel <- GaussianKernel(as.integer(10), 1.2) dump <- feats_train$append_feature_obj(subkfeats_train) dump <- feats_test$append_feature_obj(subkfeats_test) dump <- kernel$append_kernel(subkernel) subkfeats_train <- RealFeatures() dump <- subkfeats_train$set_feature_matrix(fm_train_real) subkfeats_test <- RealFeatures() dump <- subkfeats_test$set_feature_matrix(fm_test_real) subkernel <- LinearKernel() dump <- feats_train$append_feature_obj(subkfeats_train) dump <- feats_test$append_feature_obj(subkfeats_test) dump <- kernel$append_kernel(subkernel) subkfeats_train <- RealFeatures() dump <- subkfeats_train$set_feature_matrix(fm_train_real) subkfeats_test <- RealFeatures() dump <- subkfeats_test$set_feature_matrix(fm_test_real) subkernel <- PolyKernel(as.integer(10), as.integer(2)) dump <- feats_train$append_feature_obj(subkfeats_train) dump <- feats_test$append_feature_obj(subkfeats_test) dump <- kernel$append_kernel(subkernel) dump <- kernel$init(feats_train, feats_train) C <- 1.2 epsilon <- 1e-5 mkl_eps <- 0.001 mkl_norm <- 1 num_threads <- as.integer(1) labels <- MulticlassLabels() labels$set_labels(label_train_multiclass) svm <- MKLMulticlass(C, kernel, labels) dump <- svm$set_epsilon(epsilon) dump <- svm$parallel$set_num_threads(num_threads) dump <- svm$set_mkl_epsilon(mkl_eps) #dump <- svm$set_mkl_norm(1.5) dump <- svm$train() dump <- kernel$init(feats_train, feats_test) lab <- svm$apply() out <- lab$get_labels()
# In this example a kernel matrix is computed for a given real-valued data set. # The kernel used is the Chi2 kernel which operates on real-valued vectors. It # computes the chi-squared distance between sets of histograms. It is a very # useful distance in image recognition (used to detect objects). The preprocessor # LogPlusOne adds one to a dense real-valued vector and takes the logarithm of # each component of it. It is most useful in situations where the inputs are # counts: When one compares differences of small counts any difference may matter # a lot, while small differences in large counts don't. This is what this log # transformation controls for. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) #LogPlusOne print('LogPlusOne') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) preproc <- LogPlusOne() dump <- preproc$init(feats_train) dump <- feats_train$add_preproc(preproc) dump <- feats_train$apply_preproc() dump <- feats_test$add_preproc(preproc) dump <- feats_test$apply_preproc() width <- 1.4 size_cache <- as.integer(10) kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# In this example a kernel matrix is computed for a given real-valued data set. # The kernel used is the Chi2 kernel which operates on real-valued vectors. It # computes the chi-squared distance between sets of histograms. It is a very # useful distance in image recognition (used to detect objects). The preprocessor # NormOne, normalizes vectors to have norm 1. library(shogun) fm_train_real <- t(as.matrix(read.table('../data/fm_train_real.dat'))) fm_test_real <- t(as.matrix(read.table('../data/fm_test_real.dat'))) #NormOne print('NormOne') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) preproc <- NormOne() dump <- preproc$init(feats_train) dump <- feats_train$add_preproc(preproc) dump <- feats_train$apply_preproc() dump <- feats_test$add_preproc(preproc) dump <- feats_test$apply_preproc() width <- 1.4 size_cache <- as.integer(10) kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()
# In this example a kernel matrix is computed for a given real-valued data set. # The kernel used is the Chi2 kernel which operates on real-valued vectors. It # computes the chi-squared distance between sets of histograms. It is a very # useful distance in image recognition (used to detect objects). The preprocessor # PruneVarSubMean substracts the mean from each feature and removes features that # have zero variance. library(shogun) fm_train_real <- as.matrix(read.table('../data/fm_train_real.dat')) fm_test_real <- as.matrix(read.table('../data/fm_test_real.dat')) #PruneVarSubMean print('PruneVarSubMean') feats_train <- RealFeatures() dump <- feats_train$set_feature_matrix(fm_train_real) feats_test <- RealFeatures() dump <- feats_test$set_feature_matrix(fm_test_real) preproc <- PruneVarSubMean() dump <- preproc$init(feats_train) dump <- feats_train$add_preproc(preproc) dump <- feats_train$apply_preproc() dump <- feats_test$add_preproc(preproc) dump <- feats_test$apply_preproc() width <- 1.4 size_cache <- as.integer(10) kernel <- Chi2Kernel(feats_train, feats_train, width, size_cache) km_train <- kernel$get_kernel_matrix() dump <- kernel$init(feats_train, feats_test) km_test <- kernel$get_kernel_matrix()