|
SHOGUN
v2.0.0
|
This page lists ready to run shogun examples for the Java Modular interface.
To run the examples issue
javac -jar path/to/modshogun.jar name_of_example.java java -jar path/to/modshogun.jar name_of_example
// In this example the Averaged Perceptron used to classify toy data.
import org.shogun.*;
import org.jblas.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.io.Serializable;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_averaged_perceptron_modular{
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double learn_rate = 10;
int max_iter = 10;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
BinaryLabels labels = new BinaryLabels(trainlab);
AveragedPerceptron perceptron = new AveragedPerceptron(feats_train, labels);
perceptron.set_learn_rate(learn_rate);
perceptron.set_max_iter(max_iter);
perceptron.train();
perceptron.set_features(feats_test);
DoubleMatrix out_labels = obtain_from_generic(perceptron.apply()).get_labels();
modshogun.exit_shogun();
}
}
// In this example we demonstrate how to use SVMs in a domain adaptation
// scenario. Here, we assume that we have two problem domains, one with
// an abundance of training data (source domain) and one with only a few
// training examples (target domain). These domains are assumed to be
// different but related enough to transfer information between them.
// Thus, we first train an SVM on the source domain and then subsequently
// pass this previously trained SVM object to the DASVM, that we train
// on the target domain. The DASVM internally computes a custom linear term
// (for the underlying quadratic program of the dual formulation of the SVM)
// based on the support vectors of the source SVM and the training examples
// of the target SVM. Finally, it can be used for prediction just as any other
// SVM object.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_domainadaptationsvm_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int degree = 3;
int C = 1;
String[] fm_train_dna = {"CGCACGTACGTAGCTCGAT",
"CGACGTAGTCGTAGTCGTA",
"CGACGGGGGGGGGGTCGTA",
"CGACCTAGTCGTAGTCGTA",
"CGACCACAGTTATATAGTA",
"CGACGTAGTCGTAGTCGTA",
"CGACGTAGTTTTTTTCGTA",
"CGACGTAGTCGTAGCCCCA",
"CAAAAAAAAAAAAAAAATA",
"CGACGGGGGGGGGGGCGTA"};
String[] fm_test_dna = {"AGCACGTACGTAGCTCGAT",
"AGACGTAGTCGTAGTCGTA",
"CAACGGGGGGGGGGTCGTA",
"CGACCTAGTCGTAGTCGTA",
"CGAACACAGTTATATAGTA",
"CGACCTAGTCGTAGTCGTA",
"CGACGTGGGGTTTTTCGTA",
"CGACGTAGTCCCAGCCCCA",
"CAAAAAAAAAAAACCAATA",
"CGACGGCCGGGGGGGCGTA"};
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
WeightedDegreeStringKernel kernel = new WeightedDegreeStringKernel(feats_train, feats_train, degree);
double label_train_dna[][] = {{-1,-1,-1,-1,-1,1,1,1,1,1}};
BinaryLabels labels = new BinaryLabels(new DoubleMatrix(label_train_dna));
SVMLight svm = new SVMLight(C, kernel, labels);
svm.train();
DomainAdaptationSVM dasvm = new DomainAdaptationSVM(C, kernel, labels, svm, 1.0);
dasvm.train();
DoubleMatrix out = obtain_from_generic(dasvm.apply(feats_test)).get_labels();
modshogun.exit_shogun();
}
}
// In this example the Gaussian Naive Bayes algorithm used to classify
// toy data
import org.shogun.*;
import org.jblas.*;
import static org.shogun.MulticlassLabels.obtain_from_generic;
public class classifier_gaussiannaivebayes_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_multiclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
MulticlassLabels labels = new MulticlassLabels(trainlab);
GaussianNaiveBayes gnb = new GaussianNaiveBayes(feats_train, labels);
gnb.train();
DoubleMatrix out_labels = obtain_from_generic(gnb.apply(feats_test)).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a multi-class support vector machine is trained on a toy data
// set and the trained classifier is then used to predict labels of test
// examples. The training algorithm is based on BSVM formulation (L2-soft margin
// and the bias added to the objective function) which is solved by the Improved
// Mitchell-Demyanov-Malozemov algorithm. The training algorithm uses the Gaussian
// kernel of width 2.1 and the regularization constant C=1. The solver stops if the
// relative duality gap falls below 1e-5.
//
// For more details on the used SVM solver see
// V.Franc: Optimization Algorithms for Kernel Methods. Research report.
// CTU-CMP-2005-22. CTU FEL Prague. 2005.
// ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-PhD.pdf .
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.MulticlassLabels.obtain_from_generic;
public class classifier_gmnpsvm_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_multiclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
MulticlassLabels labels = new MulticlassLabels(trainlab);
GMNPSVM svm = new GMNPSVM(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.train();
kernel.init(feats_train, feats_test);
DoubleMatrix out_labels = obtain_from_generic(svm.apply(feats_test)).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a two-class support vector machine classifier is trained on a
// toy data set and the trained classifier is then used to predict labels of test
// examples. As training algorithm Gradient Projection Decomposition Technique
// (GPDT) is used with SVM regularization parameter C=1 and a Gaussian
// kernel of width 2.1. The solver returns an epsilon-precise (epsilon=1e-5) solution.
//
// For more details on GPDT solver see http://dm.unife.it/gpdt .
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_gpbtsvm_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
BinaryLabels labels = new BinaryLabels(trainlab);
GPBTSVM svm = new GPBTSVM(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.train();
kernel.init(feats_train, feats_test);
DoubleMatrix out_labels = obtain_from_generic(svm.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// This example shows usage of a k-nearest neighbor (KNN) classification rule on
// a toy data set. The number of the nearest neighbors is set to k=3 and the distances
// are measured by the Euclidean metric. Finally, the KNN rule is applied to predict
// labels of test examples.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.MulticlassLabels.obtain_from_generic;
public class classifier_knn_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int k = 3;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_multiclass.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
MulticlassLabels labels = new MulticlassLabels(trainlab);
KNN knn = new KNN(k, distance, labels);
knn.train();
DoubleMatrix out_labels = obtain_from_generic(knn.apply(feats_test)).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a multi-class support vector machine classifier is trained on a
// toy data set and the trained classifier is then used to predict labels of test
// examples. As training algorithm the LaRank algorithm is used with SVM
// regularization parameter C=1 and a Gaussian kernel of width 2.1 and a precision
// set to epsilon=1e-5.
//
// For more details on LaRank see
// Bordes, A. and Bottou, L. and Gallinari, P. and Weston, J.
// Solving MultiClass Support Vector Machines with LaRank. ICML 2007.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.MulticlassLabels.obtain_from_generic;
public class classifier_larank_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_multiclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
MulticlassLabels labels = new MulticlassLabels(trainlab);
LaRank svm = new LaRank(C, kernel, labels);
svm.set_batch_mode(false);
svm.set_epsilon(epsilon);
svm.train();
DoubleMatrix out_labels = obtain_from_generic(svm.apply(feats_train)).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a two-class linear classifier based on the Linear Discriminant
// Analysis (LDA) is trained on a toy data set and then the trained classifier is
// used to predict test examples. The regularization parameter, which corresponds
// to a weight of a unitary matrix added to the covariance matrix, is set to
// gamma=3.
//
// For more details on the LDA see e.g.
// http://en.wikipedia.org/wiki/Linear_discriminant_analysis
import org.shogun.*;
import org.jblas.*;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_lda_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int gamma = 3;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
BinaryLabels labels = new BinaryLabels(trainlab);
LDA lda = new LDA(gamma, feats_train, labels);
lda.train();
System.out.println(lda.get_bias());
System.out.println(lda.get_w().toString());
lda.set_features(feats_test);
DoubleMatrix out_labels = obtain_from_generic(lda.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a two-class linear support vector machine classifier is trained
// on a toy data set and the trained classifier is then used to predict labels of
// test examples. As training algorithm the LIBLINEAR solver is used with the SVM
// regularization parameter C=0.9 and the bias in the classification rule switched
// on and the precision parameters epsilon=1e-5.
//
// For more details on LIBLINEAR see
// http://www.csie.ntu.edu.tw/~cjlin/liblinear/
import org.shogun.*;
import org.jblas.*;
import static org.shogun.LIBLINEAR_SOLVER_TYPE.L2R_L2LOSS_SVC_DUAL;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_liblinear_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double C = 0.9;
double epsilon = 1e-3;
org.shogun.Math.init_random(17);
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
BinaryLabels labels = new BinaryLabels(trainlab);
LibLinear svm = new LibLinear(C, feats_train, labels);
svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL);
svm.set_epsilon(epsilon);
svm.set_bias_enabled(true);
svm.train();
svm.set_features(feats_test);
DoubleMatrix out_labels = obtain_from_generic(svm.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a two-class support vector machine classifier is trained on a
// 2-dimensional randomly generated data set and the trained classifier is used to
// predict labels of test examples. As training algorithm the LIBSVM solver is used
// with SVM regularization parameter C=1 and a Gaussian kernel of width 2.1.
//
// For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/
import org.shogun.*;
import org.jblas.*;
import static org.jblas.MatrixFunctions.signum;
import static org.jblas.DoubleMatrix.concatHorizontally;
import static org.jblas.DoubleMatrix.ones;
import static org.jblas.DoubleMatrix.randn;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_libsvm_minimal_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int num = 1000;
double dist = 1.0;
double width = 2.1;
double C = 1.0;
DoubleMatrix offs=ones(2, num).mmul(dist);
DoubleMatrix x = randn(2, num).sub(offs);
DoubleMatrix y = randn(2, num).add(offs);
DoubleMatrix traindata_real = concatHorizontally(x, y);
DoubleMatrix m = randn(2, num).sub(offs);
DoubleMatrix n = randn(2, num).add(offs);
DoubleMatrix testdata_real = concatHorizontally(m, n);
DoubleMatrix o = ones(1,num);
DoubleMatrix trainlab = concatHorizontally(o.neg(), o);
DoubleMatrix testlab = concatHorizontally(o.neg(), o);
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
BinaryLabels labels = new BinaryLabels(trainlab);
LibSVM svm = new LibSVM(C, kernel, labels);
svm.train();
DoubleMatrix out = obtain_from_generic(svm.apply(feats_test)).get_labels();
System.out.println("Mean Error = " + signum(out).ne(testlab).mean());
modshogun.exit_shogun();
}
}
// In this example a two-class support vector machine classifier is trained on a
// toy data set and the trained classifier is used to predict labels of test
// examples. As training algorithm the LIBSVM solver is used with SVM
// regularization parameter C=1 and a Gaussian kernel of width 2.1 and the
// precision parameter epsilon=1e-5. The example also shows how to retrieve the
// support vectors from the train SVM model.
//
// For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/
import org.shogun.*;
import org.jblas.*;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_libsvm_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
BinaryLabels labels = new BinaryLabels(trainlab);
LibSVM svm = new LibSVM(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.train();
kernel.init(feats_train, feats_test);
DoubleMatrix out_labels = obtain_from_generic(svm.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a one-class support vector machine classifier is trained on a
// toy data set. The training algorithm finds a hyperplane in the RKHS which
// separates the training data from the origin. The one-class classifier is
// typically used to estimate the support of a high-dimesnional distribution.
// For more details see e.g.
// B. Schoelkopf et al. Estimating the support of a high-dimensional
// distribution. Neural Computation, 13, 2001, 1443-1471.
//
// In the example, the one-class SVM is trained by the LIBSVM solver with the
// regularization parameter C=1 and the Gaussian kernel of width 2.1 and the
// precision parameter epsilon=1e-5.
//
// For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/
import org.shogun.*;
import org.jblas.*;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_libsvmoneclass_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
LibSVMOneClass svm = new LibSVMOneClass(C, kernel);
svm.set_epsilon(epsilon);
svm.train();
kernel.init(feats_train, feats_test);
DoubleMatrix out_labels = obtain_from_generic(svm.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a two-class support vector machine classifier is trained on a
// toy data set and the trained classifier is used to predict labels of test
// examples. As training algorithm the Minimal Primal Dual SVM is used with SVM
// regularization parameter C=1 and a Gaussian kernel of width 1.2 and the
// precision parameter 1e-5.
//
// For more details on the MPD solver see
// Kienzle, W. and B. Schölkopf: Training Support Vector Machines with Multiple
// Equality Constraints. Machine Learning: ECML 2005, 182-193. (Eds.) Carbonell,
// J. G., J. Siekmann, Springer, Berlin, Germany (11 2005)
import org.shogun.*;
import org.jblas.*;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_mpdsvm_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
BinaryLabels labels = new BinaryLabels(trainlab);
MPDSVM svm = new MPDSVM(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.train();
kernel.init(feats_train, feats_test);
DoubleMatrix out_labels = obtain_from_generic(svm.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
import org.shogun.*;
import org.jblas.*;
import static org.shogun.MulticlassLabels.obtain_from_generic;
public class classifier_multiclasslibsvm_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_multiclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
MulticlassLabels labels = new MulticlassLabels(trainlab);
MulticlassLibSVM svm = new MulticlassLibSVM(C, kernel, labels);
svm.set_epsilon(epsilon);
svm.train();
kernel.init(feats_train, feats_test);
DoubleMatrix out_labels = obtain_from_generic(svm.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// This example shows usage of the Perceptron algorithm for training a two-class
// linear classifier, i.e. y = sign( <x,w>+b). The Perceptron algorithm works by
// iteratively passing though the training examples and applying the update rule on
// those examples which are misclassified by the current classifier. The Perceptron
// update rule reads
//
// w(t+1) = w(t) + alpha * y_t * x_t
// b(t+1) = b(t) + alpha * y_t
//
// where (x_t,y_t) is feature vector and label (must be +1/-1) of the misclassified example
// (w(t),b(t)) are the current parameters of the linear classifier
// (w(t+1),b(t+1)) are the new parameters of the linear classifier
// alpha is the learning rate; in this examples alpha=1
//
// The Perceptron algorithm iterates until all training examples are correctly
// classified or the prescribed maximal number of iterations, in this example
// max_iter=1000, is reached.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_perceptron_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double learn_rate = 1.0;
int max_iter = 1000;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures();
feats_train.set_feature_matrix(traindata_real);
RealFeatures feats_test = new RealFeatures();
feats_test.set_feature_matrix(testdata_real);
BinaryLabels labels = new BinaryLabels(trainlab);
Perceptron perceptron = new Perceptron(feats_train, labels);
perceptron.set_learn_rate(learn_rate);
perceptron.set_max_iter(max_iter);
perceptron.train();
perceptron.set_features(feats_test);
DoubleMatrix out_labels = obtain_from_generic(perceptron.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// This example demonstrates how to train an SVMLight classifier
// using a custom linear term. This is used in the class DASVM that
// pre-computes this linear term using a previously trained SVM.
//
import org.shogun.*;
import org.jblas.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.shogun.EAlphabet.DNA;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_svmlight_linear_term_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
int degree = 20;
modshogun.init_shogun_with_defaults();
double C = 0.9;
double epsilon = 1e-3;
int num_threads = 1;
String[] fm_train_dna = {"CGCACGTACGTAGCTCGAT",
"CGACGTAGTCGTAGTCGTA",
"CGACGGGGGGGGGGTCGTA",
"CGACCTAGTCGTAGTCGTA",
"CGACCACAGTTATATAGTA",
"CGACGTAGTCGTAGTCGTA",
"CGACGTAGTTTTTTTCGTA",
"CGACGTAGTCGTAGCCCCA",
"CAAAAAAAAAAAAAAAATA",
"CGACGGGGGGGGGGGCGTA"};
String[] fm_test_dna = {"AGCACGTACGTAGCTCGAT",
"AGACGTAGTCGTAGTCGTA",
"CAACGGGGGGGGGGTCGTA",
"CGACCTAGTCGTAGTCGTA",
"CGAACACAGTTATATAGTA",
"CGACCTAGTCGTAGTCGTA",
"CGACGTGGGGTTTTTCGTA",
"CGACGTAGTCCCAGCCCCA",
"CAAAAAAAAAAAACCAATA",
"CGACGGCCGGGGGGGCGTA"};
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
WeightedDegreeStringKernel kernel = new WeightedDegreeStringKernel(feats_train, feats_train, degree);
double label_train_dna[][] = {{-1,-1,-1,-1,-1,1,1,1,1,1}};
BinaryLabels labels = new BinaryLabels(new DoubleMatrix(label_train_dna));
SVMLight svm = new SVMLight(C, kernel, labels);
svm.set_qpsize(3);
svm.set_linear_term(new DoubleMatrix(new double[][] {{-1,-2,-3,-4,-5,-6,-7,-8,-7,-6}}));
svm.set_epsilon(epsilon);
//svm.parallel.set_num_threads(num_threads);
svm.train();
kernel.init(feats_train, feats_test);
obtain_from_generic(svm.apply()).get_labels();
modshogun.exit_shogun();
}
}
// In this example a two-class support vector machine classifier is trained on a
// DNA splice-site detection data set and the trained classifier is used to predict
// labels on test set. As training algorithm SVM^light is used with SVM
// regularization parameter C=1.2 and the Weighted Degree kernel of degree 20 and
// the precision parameter epsilon=1e-5.
//
// For more details on the SVM^light see
// T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
// Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
//
// For more details on the Weighted Degree kernel see
// G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively
// spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005.
import org.shogun.*;
import org.jblas.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.shogun.EAlphabet.DNA;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class classifier_svmlight_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
int degree = 20;
modshogun.init_shogun_with_defaults();
double C = 1.1;
double epsilon = 1e-5;
int num_threads = 1;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
BinaryLabels labels = new BinaryLabels(Load.load_labels("../data/label_train_dna.dat"));
WeightedDegreeStringKernel kernel = new WeightedDegreeStringKernel(feats_train, feats_train, degree);
SVMLight svm = new SVMLight(C, kernel, labels);
svm.set_epsilon(epsilon);
//svm.parallel.set_num_threads(num_threads);
svm.train();
kernel.init(feats_train, feats_test);
obtain_from_generic(svm.apply()).get_labels();
modshogun.exit_shogun();
}
}
// In this example an agglomerative hierarchical single linkage clustering method
// is used to cluster a given toy data set. Starting with each object being
// assigned to its own cluster clusters are iteratively merged. Here the clusters
// are merged that have the closest (minimum distance, here set via the Euclidean
// distance object) two elements.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class clustering_hierarchical_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int merges = 3;
DoubleMatrix fm_train = Load.load_numbers("../data/fm_train_real.dat");
RealFeatures feats_train = new RealFeatures(fm_train);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
Hierarchical hierarchical = new Hierarchical(merges, distance);
hierarchical.train();
DoubleMatrix out_distance = hierarchical.get_merge_distances();
DoubleMatrix out_cluster = hierarchical.get_cluster_pairs();
modshogun.exit_shogun();
}
}
// In this example the k-means clustering method is used to cluster a given toy
// data set. In k-means clustering one tries to partition n observations into k
// clusters in which each observation belongs to the cluster with the nearest mean.
// The algorithm class constructor takes the number of clusters and a distance to
// be used as input. The distance used in this example is Euclidean distance.
// After training one can fetch the result of clustering by obtaining the cluster
// centers and their radiuses.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.Math.init_random;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class clustering_kmeans_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int k = 3;
init_random(17);
DoubleMatrix fm_train = Load.load_numbers("../data/fm_train_real.dat");
RealFeatures feats_train = new RealFeatures(fm_train);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
KMeans kmeans = new KMeans(k, distance);
kmeans.train();
DoubleMatrix out_centers = kmeans.get_cluster_centers();
kmeans.get_radiuses();
modshogun.exit_shogun();
}
}
// In this example toy data is being processed using the Isomap algorithm
// as described in
//
// Silva, V. D., & Tenenbaum, J. B. (2003).
// Global versus local methods in nonlinear dimensionality reduction.
// Advances in Neural Information Processing Systems 15, 15(Figure 2), 721-728. MIT Press.
// Retrieved from http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.9.3407&rep=rep1&type=pdf
//
// Before applying to the data the landmark approximation is enabled with
// specified number of landmarks. The landmark approximation is described in
//
// Sparse multidimensional scaling using landmark points
// V De Silva, J B Tenenbaum (2004) Technology, p. 1-4
//
// After enabling the landmark approximation k parameter -- the number
// of neighbors in the k nearest neighbor graph -- is initialized.
import org.shogun.*;
import org.jblas.*;
public class converter_isomap_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix data = Load.load_numbers("../data/fm_train_real.dat");
RealFeatures features = new RealFeatures(data);
Isomap isomap = new Isomap();
isomap.set_target_dim(1);
isomap.set_k(6);
isomap.set_landmark(false);
RealFeatures embedding = isomap.embed(features);
modshogun.exit_shogun();
}
}
# In this example toy data is being preprocessed using the Locally Linear Embedding (LLE)
# algorithm as described in
#
# Saul, L. K., Ave, P., Park, F., & Roweis, S. T. (2001).
# An Introduction to Locally Linear Embedding. Available from, 290(5500), 2323-2326.
# Retrieved from: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7319&rep=rep1&type=pdf
#
# The number of neighbors used during the linear reconstruction step of the algorithm is set
# before processing of the data.
#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')
parameter_list = [[data,10],[data,20]]
def converter_locallylinearembedding_modular (data,k):
from shogun.Features import RealFeatures
from shogun.Converter import LocallyLinearEmbedding
features = RealFeatures(data)
converter = LocallyLinearEmbedding()
converter.set_target_dim(1)
converter.set_k(k)
converter.apply(features)
return features
if __name__=='__main__':
print('LocallyLinearEmbedding')
converter_locallylinearembedding_modular(*parameter_list[0])
// In this example toy data is being processed using the multidimensional
// scaling as described on p.261 (Section 12.1) of
//
// Borg, I., & Groenen, P. J. F. (2005).
// Modern multidimensional scaling: Theory and applications. Springer.
//
// Before processing the landmark approximation is disabled.
import org.shogun.*;
import org.jblas.*;
public class converter_multidimensionalscaling_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix data = Load.load_numbers("../data/fm_train_real.dat");
RealFeatures features = new RealFeatures(data);
MultidimensionalScaling mds = new MultidimensionalScaling();
mds.set_target_dim(1);
mds.set_landmark(false);
mds.embed(features);
modshogun.exit_shogun();
}
}
// An approach as applied below, which shows the processing of input data
// from a file becomes a crucial factor for writing your own sample applications.
// This approach is just one example of what can be done using the distance
// functions provided by shogun.
//
// First, you need to determine what type your data will be, because this
// will determine the distance function you can use.
//
// This example loads two stored matrices of real values from different
// files and initializes the matrices to 'RealFeatures'.
// Each column of the matrices corresponds to one data point.
//
// The distance initialized by two data sets (the same data set as shown in the
// first call) controls the processing of the given data points, where a pairwise
// distance matrix is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// The method call 'init'* binds the given data sets, where a pairwise distance
// matrix between these two data sets is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// *Note that the previous computed distance matrix can no longer be
// reaccessed by 'get_distance_matrix'.
//
// For more details see doc/classshogun_1_1CBrayCurtisDistance.html.
//
// Obviously, using the Bray Curtis distance is not limited to this showcase
// example.
import org.shogun.*;
import org.jblas.*;
public class distance_braycurtis_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
BrayCurtisDistance distance = new BrayCurtisDistance(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// An approach as applied below, which shows the processing of input data
// from a file becomes a crucial factor for writing your own sample applications.
// This approach is just one example of what can be done using the distance
// functions provided by shogun.
//
// First, you need to determine what type your data will be, because this
// will determine the distance function you can use.
//
// This example loads two stored matrices of real values from different
// files and initializes the matrices to 'RealFeatures'.
// Each column of the matrices corresponds to one data point.
//
// The distance initialized by two data sets (the same data set as shown in the
// first call) controls the processing of the given data points, where a pairwise
// distance (dissimilarity ratio) matrix is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// The method call 'init'* binds the given data sets, where a pairwise distance
// matrix between these two data sets is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// *Note that the previous computed distance matrix can no longer be
// reaccessed by 'get_distance_matrix'.
//
// For more details see doc/classshogun_1_1CCanberraMetric.html.
//
// Obviously, using the Canberra distance is not limited to this showcase
// example.
import org.shogun.*;
import org.jblas.*;
public class distance_canberra_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
CanberraMetric distance = new CanberraMetric(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// This example shows how to compute the Canberra Word Distance.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class distance_canberraword_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int order = 3;
int gap = 0;
boolean reverse = false;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
StringWordFeatures feats_train = new StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
SortWordString preproc = new SortWordString();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
StringCharFeatures charfeat_test = new StringCharFeatures(DNA);
charfeat_test.set_features(fm_test_dna);
StringWordFeatures feats_test = new StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat_test, order-1, order, gap, reverse);
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
CanberraWordDistance distance = new CanberraWordDistance(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
modshogun.exit_shogun();
}
}
// An approach as applied below, which shows the processing of input data
// from a file becomes a crucial factor for writing your own sample applications.
// This approach is just one example of what can be done using the distance
// functions provided by shogun.
//
// First, you need to determine what type your data will be, because this
// will determine the distance function you can use.
//
// This example loads two stored matrices of real values from different
// files and initializes the matrices to 'RealFeatures'.
// Each column of the matrices corresponds to one data point.
//
// The distance initialized by two data sets (the same data set as shown in the
// first call) controls the processing of the given data points, where a pairwise
// distance (maximum of absolute feature dimension differences) matrix is
// computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// The method call 'init'* binds the given data sets, where a pairwise distance
// (maximum of absolute feature dimension differences) matrix between these
// two data sets is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// *Note that the previous computed distance matrix can no longer be
// reaccessed by 'get_distance_matrix'.
//
// For more details see doc/classshogun_1_1CChebyshewMetric.html.
//
// Obviously, using the Chebyshew distance is not limited to this showcase
// example.
import org.shogun.*;
import org.jblas.*;
public class distance_chebyshew_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
ChebyshewMetric distance = new ChebyshewMetric(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// An approach as applied below, which shows the processing of input data
// from a file becomes a crucial factor for writing your own sample applications.
// This approach is just one example of what can be done using the distance
// functions provided by shogun.
//
// First, you need to determine what type your data will be, because this
// will determine the distance function you can use.
//
// This example loads two stored matrices of real values from different
// files and initializes the matrices to 'RealFeatures'.
// Each column of the matrices corresponds to one data point.
//
// The distance initialized by two data sets (the same data set as shown in the
// first call) controls the processing of the given data points, where a pairwise
// distance matrix is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// The method call 'init'* binds the given data sets, where a pairwise distance
// matrix between these two data sets is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// *Note that the previous computed distance matrix can no longer be
// reaccessed by 'get_distance_matrix'.
//
// For more details see doc/classshogun_1_1CChiSquareDistance.html.
//
// Obviously, using the ChiSquare distance is not limited to this showcase
// example.
import org.shogun.*;
import org.jblas.*;
public class distance_chisquare_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
ChiSquareDistance distance = new ChiSquareDistance(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// An approach as applied below, which shows the processing of input data
// from a file becomes a crucial factor for writing your own sample applications.
// This approach is just one example of what can be done using the distance
// functions provided by shogun.
//
// First, you need to determine what type your data will be, because this
// will determine the distance function you can use.
//
// This example loads two stored matrices of real values from different
// files and initializes the matrices to 'RealFeatures'.
// Each column of the matrices corresponds to one data point.
//
// The distance initialized by two data sets (the same data set as shown in the
// first call) controls the processing of the given data points, where a pairwise
// distance matrix is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// The method call 'init'* binds the given data sets, where a pairwise distance
// matrix between these two data sets is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// *Note that the previous computed distance matrix can no longer be
// reaccessed by 'get_distance_matrix'.
//
// For more details see doc/classshogun_1_1CCosineDistance.html.
//
// Obviously, using the Cosine distance is not limited to this showcase
// example.
import org.shogun.*;
import org.jblas.*;
public class distance_cosine_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
CosineDistance distance = new CosineDistance(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
import org.shogun.*;
import org.jblas.*;
public class distance_euclidean_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// An approach as applied below, which shows the processing of input data
// from a file becomes a crucial factor for writing your own sample applications.
// This approach is just one example of what can be done using the distance
// functions provided by shogun.
//
// First, you need to determine what type your data will be, because this
// will determine the distance function you can use.
//
// This example loads two stored matrices of real values from different
// files and initializes the matrices to 'RealFeatures'.
// Each column of the matrices corresponds to one data point.
//
// The distance initialized by two data sets (the same data set as shown in the
// first call) controls the processing of the given data points, where a
// pairwise distance (shortest path on a sphere) matrix is computed
// by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// The method call 'init'* binds the given data sets, where a pairwise distance
// (shortest path on a sphere) matrix between these two data sets is
// computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// *Note that the previous computed distance matrix can no longer be
// reaccessed by 'get_distance_matrix'.
//
// For more details see doc/classshogun_1_1CGeodesicMetric.html.
//
// Obviously, using the Geodesic distance is not limited to this showcase
// example.
import org.shogun.*;
import org.jblas.*;
public class distance_geodesic_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
GeodesicMetric distance = new GeodesicMetric(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// This example shows how to compute the Hamming Word Distance for string features.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class distance_hammingword_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int order = 3;
int gap = 0;
boolean reverse = false;
boolean use_sign = false;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
DoubleMatrix fm_test_real = Load.load_labels("../data/fm_test_real.dat");
StringCharFeatures charfeat = new StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
StringWordFeatures feats_train = new StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
SortWordString preproc = new SortWordString();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
StringCharFeatures charfeat_test = new StringCharFeatures(DNA);
charfeat_test.set_features(fm_test_dna);
StringWordFeatures feats_test = new StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat_test, order-1, order, gap, reverse);
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
HammingWordDistance distance = new HammingWordDistance(feats_train, feats_train, use_sign);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
modshogun.exit_shogun();
}
}
// An approach as applied below, which shows the processing of input data
// from a file becomes a crucial factor for writing your own sample applications.
// This approach is just one example of what can be done using the distance
// functions provided by shogun.
//
// First, you need to determine what type your data will be, because this
// will determine the distance function you can use.
//
// This example loads two stored matrices of real values from different
// files and initializes the matrices to 'RealFeatures'.
// Each column of the matrices corresponds to one data point.
//
// The distance initialized by two data sets (the same data set as shown in the
// first call) controls the processing of the given data points, where a pairwise
// distance (divergence measure based on the Kullback-Leibler divergence) matrix
// is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// The method call 'init'* binds the given data sets, where a pairwise distance
// (divergence measure based on the Kullback-Leibler divergence) matrix between
// these two data sets is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// *Note that the previous computed distance matrix can no longer be
// reaccessed by 'get_distance_matrix'.
//
// For more details see doc/classshogun_1_1CJensenMetric.html.
//
// Obviously, using the Jensen-Shannon distance/divergence is not limited to
// this showcase example.
import org.shogun.*;
import org.jblas.*;
public class distance_jensen_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
JensenMetric distance = new JensenMetric(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// This example shows how to compute the Manhatten Distance.
import org.shogun.*;
import org.jblas.*;
public class distance_manhatten_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
ManhattanMetric distance = new ManhattanMetric(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// This example shows how to compute the Manahattan Distance for string features.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class distance_manhattenword_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int order = 3;
int gap = 0;
boolean reverse = false;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
DoubleMatrix fm_test_real = Load.load_numbers("../data/fm_test_real.dat");
StringCharFeatures charfeat = new StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
StringWordFeatures feats_train = new StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
SortWordString preproc = new SortWordString();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
StringCharFeatures charfeat_test = new StringCharFeatures(DNA);
charfeat_test.set_features(fm_test_dna);
StringWordFeatures feats_test = new StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat_test, order-1, order, gap, reverse);
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
ManhattanWordDistance distance = new ManhattanWordDistance(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
modshogun.exit_shogun();
}
}
// An approach as applied below, which shows the processing of input data
// from a file becomes a crucial factor for writing your own sample applications.
// This approach is just one example of what can be done using the distance
// functions provided by shogun.
//
// First, you need to determine what type your data will be, because this
// will determine the distance function you can use.
//
// This example loads two stored matrices of real values from different
// files and initializes the matrices to 'RealFeatures'.
// Each column of the matrices corresponds to one data point.
//
// The distance initialized by two data sets (the same data set as shown in the
// first call) and norm 'k' controls the processing of the given data points,
// where a pairwise distance matrix is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// The method call 'init'* binds the given data sets, where a pairwise distance
// matrix between these two data sets is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// *Note that the previous computed distance matrix can no longer be
// reaccessed by 'get_distance_matrix'.
//
// For more details see doc/classshogun_1_1CMinkowskiMetric.html.
//
// Obviously, using the Minkowski metric is not limited to this showcase
// example.
import org.shogun.*;
import org.jblas.*;
public class distance_minkowski_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double k = 3;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
MinkowskiMetric distance = new MinkowskiMetric(feats_train, feats_train, k);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// In this example an squared euclidian distance is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class distance_normsquared_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
distance.set_disable_sqrt(true);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// An approach as applied below, which shows the processing of input data
// from a file becomes a crucial factor for writing your own sample applications.
// This approach is just one example of what can be done using the distance
// functions provided by shogun.
//
// First, you need to determine what type your data will be, because this
// will determine the distance function you can use.
//
// This example loads two stored matrices of real values from different
// files and initializes the matrices to 'RealFeatures'.
// Each column of the matrices corresponds to one data point.
//
// The distance initialized by two data sets (the same data set as shown in the
// first call) controls the processing of the given data points, where a pairwise
// distance (extended Jaccard coefficient) matrix is computed by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// The method call 'init'* binds the given data sets, where a pairwise distance
// (extended Jaccard coefficient) matrix between these two data sets is computed
// by 'get_distance_matrix'.
//
// The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
//
// *Note that the previous computed distance matrix can no longer be
// reaccessed by 'get_distance_matrix'.
//
// For more details see doc/classshogun_1_1CTanimotoDistance.html.
//
// Obviously, using the Tanimoto distance/coefficient is not limited to
// this showcase example.
import org.shogun.*;
import org.jblas.*;
public class distance_tanimoto_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
TanimotoDistance distance = new TanimotoDistance(feats_train, feats_train);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// In this example the Histogram algorithm object computes a histogram over all
// 16bit unsigned integers in the features.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class distribution_histogram_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
boolean reverse = false;
modshogun.init_shogun_with_defaults();
int order = 3;
int gap = 4;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures feats = new StringWordFeatures(charfeat.get_alphabet());
feats.obtain_from_char(charfeat, order-1, order, gap, reverse);
Histogram histo = new Histogram(feats);
histo.train();
DoubleMatrix histogram = histo.get_histogram();
System.out.println(histogram);
//int num_examples = feats.get_num_vectors();
//int num_param = histo.get_num_model_parameters();
//DoubleMatrix out_likelihood = histo.get_log_likelihood();
//double out_sample = histo.get_log_likelihood_sample();
modshogun.exit_shogun();
}
}
// In this example a hidden markov model with 3 states and 6 transitions is trained
// on a string data set. After calling the constructor of the HMM class specifying
// the number of states and transitions the model is trained. Via the Baum-Welch
// algorithm the optimal transition and emission probabilities are estimated. The
// best path, i.e. the path with highest probability given the model can then be
// calculated using get_best_path_state.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.CUBE;
import static org.shogun.BaumWelchViterbiType.BW_NORMAL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class distribution_hmm_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
boolean reverse = false;
modshogun.init_shogun_with_defaults();
int N = 1;
int M = 512;
double pseudo = 1e-5;
int order = 3;
int gap = 0;
String[] fm_train_dna = Load.load_cubes("../data/fm_train_cube.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, CUBE);
StringWordFeatures feats = new StringWordFeatures(charfeat.get_alphabet());
feats.obtain_from_char(charfeat, order-1, order, gap, reverse);
HMM hmm = new HMM(feats, N, M, pseudo);
hmm.train();
hmm.baum_welch_viterbi_train(BW_NORMAL);
int num_examples = feats.get_num_vectors();
int num_param = hmm.get_num_model_parameters();
for (int i = 0; i < num_examples; i++)
for(int j = 0; j < num_param; j++) {
hmm.get_log_derivative(j, i);
}
int best_path = 0;
int best_path_state = 0;
for(int i = 0; i < num_examples; i++){
best_path += hmm.best_path(i);
for(int j = 0; j < N; j++)
best_path_state += hmm.get_best_path_state(i, j);
}
DoubleMatrix lik_example = hmm.get_log_likelihood();
double lik_sample = hmm.get_log_likelihood_sample();
modshogun.exit_shogun();
}
}
// Trains an inhomogeneous Markov chain of order 3 on a DNA string data set. Due to
// the structure of the Markov chain it is very similar to a HMM with just one
// chain of connected hidden states - that is why we termed this linear HMM.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class distribution_linearhmm_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
boolean reverse = false;
modshogun.init_shogun_with_defaults();
int order = 3;
int gap = 4;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures feats = new StringWordFeatures(charfeat.get_alphabet());
feats.obtain_from_char(charfeat, order-1, order, gap, reverse);
LinearHMM hmm = new LinearHMM(feats);
hmm.train();
hmm.get_transition_probs();
int num_examples = feats.get_num_vectors();
int num_param = hmm.get_num_model_parameters();
for (int i = 0; i < num_examples; i++)
for(int j = 0; j < num_param; j++) {
hmm.get_log_derivative(j, i);
}
DoubleMatrix out_likelihood = hmm.get_log_likelihood();
double out_sample = hmm.get_log_likelihood_sample();
modshogun.exit_shogun();
}
}
// In this example usage of the Positional PWM is shown
import org.shogun.*;
import org.jblas.*;
import static org.jblas.MatrixFunctions.logi;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class distribution_ppwm_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
boolean reverse = false;
modshogun.init_shogun_with_defaults();
int order = 3;
int gap = 4;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures feats = new StringWordFeatures(charfeat.get_alphabet());
feats.obtain_from_char(charfeat, order-1, order, gap, reverse);
PositionalPWM ppwm = new PositionalPWM();
ppwm.set_sigma(5.0);
ppwm.set_mean(10.0);
DoubleMatrix pwm = new DoubleMatrix(new double[][] {{0.0, 0.5, 0.1, 1.0},
{0.0, 0.5, 0.5, 0.0},
{1.0, 0.0, 0.4, 0.0},
{0.0, 0.0, 0.0, 0.0}});
ppwm.set_pwm(logi(pwm));
ppwm.compute_w(20);
DoubleMatrix w = ppwm.get_w();
modshogun.exit_shogun();
}
}
// In this example various (accuracy, error rate, ..) measures are being computed
// for the pair of ground truth toy data and random data.
import org.shogun.*;
import org.jblas.*;
import static org.jblas.DoubleMatrix.randn;
public class evaluation_contingencytableevaluation_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix ground_truth = Load.load_labels("../data/label_train_twoclass.dat");
DoubleMatrix predicted = randn(1, ground_truth.getLength());
BinaryLabels ground_truth_labels = new BinaryLabels(ground_truth);
BinaryLabels predicted_labels = new BinaryLabels(predicted);
ContingencyTableEvaluation base_evaluator = new ContingencyTableEvaluation();
base_evaluator.evaluate(predicted_labels,ground_truth_labels);
AccuracyMeasure evaluator1 = new AccuracyMeasure();
double accuracy = evaluator1.evaluate(predicted_labels,ground_truth_labels);
ErrorRateMeasure evaluator2 = new ErrorRateMeasure();
double errorrate = evaluator2.evaluate(predicted_labels,ground_truth_labels);
BALMeasure evaluator3 = new BALMeasure();
double bal = evaluator3.evaluate(predicted_labels,ground_truth_labels);
WRACCMeasure evaluator4 = new WRACCMeasure();
double wracc = evaluator4.evaluate(predicted_labels,ground_truth_labels);
F1Measure evaluator5 = new F1Measure();
double f1 = evaluator5.evaluate(predicted_labels,ground_truth_labels);
CrossCorrelationMeasure evaluator6 = new CrossCorrelationMeasure();
double crosscorrelation = evaluator6.evaluate(predicted_labels,ground_truth_labels);
RecallMeasure evaluator7 = new RecallMeasure();
double recall = evaluator7.evaluate(predicted_labels,ground_truth_labels);
PrecisionMeasure evaluator8 = new PrecisionMeasure();
double precision = evaluator8.evaluate(predicted_labels,ground_truth_labels);
SpecificityMeasure evaluator9 = new SpecificityMeasure();
double specificity = evaluator9.evaluate(predicted_labels,ground_truth_labels);
System.out.printf("%f, %f, %f, %f, %f, %f, %f, %f, %f\n", accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity);
modshogun.exit_shogun();
}
}
// In this example a mean squared error (MSE) is being computed
// for the pair of random vectors of length N.
import org.shogun.*;
import org.jblas.*;
import static org.jblas.DoubleMatrix.randn;
public class evaluation_meansquarederror_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int N = 100;
DoubleMatrix ground_truth = randn(1, N);
DoubleMatrix predicted = randn(1, N);
RegressionLabels ground_truth_labels = new RegressionLabels(ground_truth);
RegressionLabels predicted_labels = new RegressionLabels(predicted);
MeanSquaredError evaluator = new MeanSquaredError();
double mse = evaluator.evaluate(predicted_labels, ground_truth_labels);
System.out.println(mse);
modshogun.exit_shogun();
}
}
// In this example a multiclass accuracy is being computed for toy data labels
// and toy data labels multiplied by two.
import org.shogun.*;
import org.jblas.*;
import static org.jblas.DoubleMatrix.randn;
public class evaluation_multiclassaccuracy_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double mul = 2.0;
DoubleMatrix ground_truth = Load.load_labels("../data/label_train_multiclass.dat");
DoubleMatrix predicted = Load.load_labels("../data/label_train_multiclass.dat").mmul(mul);
MulticlassLabels ground_truth_labels = new MulticlassLabels(ground_truth);
MulticlassLabels predicted_labels = new MulticlassLabels(predicted);
MulticlassAccuracy evaluator = new MulticlassAccuracy();
double accuracy = evaluator.evaluate(predicted_labels, ground_truth_labels);
System.out.println(accuracy);
modshogun.exit_shogun();
}
}
// In this example PRC (Precision-Recall curve) is being computed
// for the pair of ground truth toy labels and random labels.
// PRC curve (as matrix) and auPRC (area under PRC) is returned.
import org.shogun.*;
import org.jblas.*;
import static org.jblas.DoubleMatrix.randn;
public class evaluation_prcevaluation_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix ground_truth = Load.load_labels("../data/label_train_twoclass.dat");
DoubleMatrix predicted = randn(1, ground_truth.getLength());
BinaryLabels ground_truth_labels = new BinaryLabels(ground_truth);
BinaryLabels predicted_labels = new BinaryLabels(predicted);
PRCEvaluation evaluator = new PRCEvaluation();
evaluator.evaluate(predicted_labels, ground_truth_labels);
System.out.println(evaluator.get_PRC());
System.out.println(evaluator.get_auPRC());
modshogun.exit_shogun();
}
}
// In this example ROC (Receiver Operator Characteristic) is being computed
// for the pair of ground truth toy labels and random labels.
// ROC curve (as matrix) and auROC (area under ROC) is returned.
import org.shogun.*;
import org.jblas.*;
import static org.jblas.DoubleMatrix.randn;
public class evaluation_rocevaluation_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
DoubleMatrix ground_truth = Load.load_labels("../data/label_train_twoclass.dat");
DoubleMatrix predicted = randn(1, ground_truth.getLength());
BinaryLabels ground_truth_labels = new BinaryLabels(ground_truth);
BinaryLabels predicted_labels = new BinaryLabels(predicted);
ROCEvaluation evaluator = new ROCEvaluation();
evaluator.evaluate(predicted_labels, ground_truth_labels);
System.out.println(evaluator.get_ROC());
System.out.println(evaluator.get_auROC());
modshogun.exit_shogun();
}
}
import org.shogun.*;
import org.jblas.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_dense_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String[] argv) {
modshogun.init_shogun_with_defaults();
ArrayList<DoubleMatrix> result = new ArrayList<DoubleMatrix>(4);
DoubleMatrix inputRealMatrix = Load.load_numbers("../data/fm_train_real.dat");
RealFeatures realFeatures = new RealFeatures(inputRealMatrix);
DoubleMatrix outputRealMatrix = realFeatures.get_feature_matrix();
result.add(inputRealMatrix);
result.add(outputRealMatrix);
DoubleMatrix inputByteMatrix = Load.load_numbers("../data/fm_train_byte.dat");
ByteFeatures byteFeatures = new ByteFeatures(inputByteMatrix);
DoubleMatrix outputByteMatrix = byteFeatures.get_feature_matrix();
result.add(inputByteMatrix);
result.add(outputByteMatrix);
System.out.println(result);
modshogun.exit_shogun();
}
}
// Creates features similar to the feature space of the SNP kernel. Useful when
// working with linear methods.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.SNP;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_snp_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
String filename = "../data/snps.dat";
StringByteFeatures sf = new StringByteFeatures(SNP);
sf.load_ascii_file(filename, false, SNP, SNP);
SNPFeatures snps = new SNPFeatures(sf);
modshogun.exit_shogun();
}
}
// This example demonstrates how to use compressed strings with shogun.
// We currently support reading and writing compressed files using
// LZO, GZIP, BZIP2 and LZMA. Furthermore, we demonstrate how to extract
// compressed streams on-the-fly in order to fit data sets into
// memory that would be too large, otherwise.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.RAWBYTE;
import static org.shogun.E_COMPRESSION_TYPE.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_string_char_compressed_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
StringFileCharFeatures f = new StringFileCharFeatures("features_string_char_compressed_modular.java", RAWBYTE);
f.save_compressed("foo_uncompressed.str", UNCOMPRESSED, 1);
StringCharFeatures f2 = new StringCharFeatures(RAWBYTE);
f2.load_compressed("foo_uncompressed.str", true);
//f.save_compressed("foo_lzo.str", LZO, 9);
f2 = new StringCharFeatures(RAWBYTE);
//f2.load_compressed("foo_lzo.str", true);
//f.save_compressed("foo_gzip.str", GZIP, 9);
f2 = new StringCharFeatures(RAWBYTE);
//f2.load_compressed("foo_gzip.str", true);
//f.save_compressed("foo_bzip2.str", BZIP2, 9);
f2 = new StringCharFeatures(RAWBYTE);
//f2.load_compressed("foo_bzip2.str", true);
//f.save_compressed("foo_lzma.str", LZMA, 9);
f2 = new StringCharFeatures(RAWBYTE);
//f2.load_compressed("foo_lzma.str", true);
f2 = new StringCharFeatures(RAWBYTE);
//f2.load_compressed("foo_lzo.str", false);
//f2.add_preprocessor(new DecompressCharString(LZO));
//f2.apply_preprocessor();
f2 = new StringCharFeatures(RAWBYTE);
//f2.load_compressed("foo_lzo.str", false);
//f2.add_preprocessor(new DecompressCharString(LZO));
//f2.enable_on_the_fly_preprocessing();
modshogun.exit_shogun();
}
}
// This example demonstrates how to encode ASCII-strings (255 symbols) in shogun.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.RAWBYTE;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_string_char_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
String[] strings = new String[] { "hey","guys","i","am","a","string"};
StringCharFeatures f = new StringCharFeatures(strings, RAWBYTE);
f.set_feature_vector(new DoubleMatrix(new double[][] {{'t','e','s','t'}}), 0);
modshogun.exit_shogun();
}
}
// This example demonstrates how to load ASCII features from a file into shogun.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.RAWBYTE;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_string_file_char_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
String fname = "features_string_file_char_modular.java";
StringFileCharFeatures f = new StringFileCharFeatures(fname, RAWBYTE);
modshogun.exit_shogun();
}
}
// This example demonstrates how to load string features from files.
// We cover two cases: First, we show how to obtain StringCharFeatues
// from a directory of text files (particularly useful in computational biology)
// and second, we demonstrate how to load StringCharFeatues from one (multi-line) file.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.RAWBYTE;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_string_file_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
String fname = "features_string_char_modular.java";
StringCharFeatures f = new StringCharFeatures(RAWBYTE);
f.load_from_directory(".");
AsciiFile fil = new AsciiFile(fname);
f.load(fil);
modshogun.exit_shogun();
}
}
// This creates a HashedWDFeatures object, i.e. an approximation to the Weighted
// Degree kernel feature space via hashes. These features can be particularly fast
// in linear SVM solvers.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.RAWDNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_string_hashed_wd_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int order = 3;
int start_order = 1;
int hash_bits = 2;
int from_order = order;
StringByteFeatures f = new StringByteFeatures(RAWDNA);
HashedWDFeatures y = new HashedWDFeatures(f,start_order,order,from_order,hash_bits);
modshogun.exit_shogun();
}
}
// In this example, we demonstrate how to obtain string features
// by using a sliding window in a memory-efficient way. Instead of copying
// the string for each position of the sliding window, we only store a reference
// with respect to the complete string. This is particularly useful, when working
// with genomic data, where storing all explicitly copied strings in memory
// quickly becomes infeasible. In addition to a sliding window (of a particular
// length) over all position, we also support defining a custom position
// list.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_string_sliding_window_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
String[] strings = new String[] {"AAAAAAAAAACCCCCCCCCCGGGGGGGGGGTTTTTTTTTT"};
StringCharFeatures f = new StringCharFeatures(strings, DNA);
f.obtain_by_sliding_window(5,1);
DynamicIntArray positions = new DynamicIntArray();
positions.append_element(0);
positions.append_element(6);
positions.append_element(16);
positions.append_element(25);
//f.obtain_by_position_list(8,positions);
modshogun.exit_shogun();
}
}
// This example demonstrates how to encode string
// features efficiently by creating a more compactly encoded
// bit-string from StringCharFeatures.
// For instance, when working with the DNA alphabet {A,T,G,C}
// using 1 char = 1 byte per symbol would be wasteful, as we
// can encode 4 symbols using 2 bits only.
// Here, this is done in junks of 64bit (ulong).
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.RAWBYTE;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_string_ulong_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
boolean rev = false;
int start = 0;
int order = 2;
int gap = 0;
StringCharFeatures cf = new StringCharFeatures(new String[] { "hey", "guys", "string"}, RAWBYTE);
StringUlongFeatures uf = new StringUlongFeatures(RAWBYTE);
uf.obtain_from_char(cf, start,order,gap,rev);
uf.set_feature_vector(new DoubleMatrix(new double[][] {{1,2,3,4,5}}), 0);
modshogun.exit_shogun();
}
}
// This example demonstrates how to encode string
// features efficiently by creating a more compactly encoded
// bit-string from StringCharFeatures.
// For instance, when working with the DNA alphabet {A,T,G,C}
// using 1 char = 1 byte per symbol would be wasteful, as we
// can encode 4 symbols using 2 bits only.
// Here, this is done in junks of 16bit (word).
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.RAWBYTE;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class features_string_word_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
String[] strings = new String[] { "hey", "guys", "string"};
StringCharFeatures cf = new StringCharFeatures(strings, RAWBYTE);
StringWordFeatures wf = new StringWordFeatures(RAWBYTE);
wf.obtain_from_char(cf, 0, 2, 0, false);
wf.set_feature_vector(new DoubleMatrix(new double[][] {{1,2,3,4,5}}), 0);
modshogun.exit_shogun();
}
}
// In this example the ANOVA kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class kernel_anova_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int cardinality = 2;
int size_cache = 5;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
ANOVAKernel kernel = new ANOVAKernel(feats_train, feats_train, cardinality, size_cache);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// This example demonstrates the use of the AUC Kernel.
import org.shogun.*;
import org.jblas.*;
import static org.jblas.DoubleMatrix.randn;
public class kernel_auc_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.6;
DoubleMatrix train_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures(train_real);
GaussianKernel subkernel = new GaussianKernel(feats_train, feats_train, width);
BinaryLabels labels = new BinaryLabels(trainlab);
AUCKernel kernel = new AUCKernel(0, subkernel);
kernel.setup_auc_maximization(labels);
DoubleMatrix km_train = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
modshogun.exit_shogun();
}
}
// In this example the Cauchy kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_cauchy_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double sigma = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
CauchyKernel kernel = new CauchyKernel(feats_train, feats_train, sigma, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// This is an example for the initialization of the chi2-kernel on real data, where
// each column of the matrices corresponds to one training/test example.
import org.shogun.*;
import org.jblas.*;
public class kernel_chi2_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.4;
int size_cache = 10;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
Chi2Kernel kernel = new Chi2Kernel(feats_train, feats_train, width, size_cache);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example the circular kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_circular_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double sigma = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
CircularKernel kernel = new CircularKernel(feats_train, feats_train, sigma, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example the combined kernel of custom kernel and poly kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class kernel_combined_custom_poly_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
CombinedKernel kernel = new CombinedKernel();
CombinedFeatures feats_train = new CombinedFeatures();
RealFeatures tfeats = new RealFeatures(traindata_real);
PolyKernel tkernel = new PolyKernel(10,3);
tkernel.init(tfeats, tfeats);
DoubleMatrix K = tkernel.get_kernel_matrix();
kernel.append_kernel(new CustomKernel(K));
RealFeatures subkfeats_train = new RealFeatures(traindata_real);
feats_train.append_feature_obj(subkfeats_train);
PolyKernel subkernel = new PolyKernel(10,2);
kernel.append_kernel(subkernel);
kernel.init(feats_train, feats_train);
BinaryLabels labels = new BinaryLabels(trainlab);
LibSVM svm = new LibSVM(C, kernel, labels);
svm.train();
CombinedKernel kernel_pred = new CombinedKernel();
CombinedFeatures feats_pred = new CombinedFeatures();
RealFeatures pfeats = new RealFeatures(testdata_real);
PolyKernel tkernel_pred = new PolyKernel(10,3);
tkernel_pred.init(tfeats, pfeats);
DoubleMatrix KK = tkernel.get_kernel_matrix();
kernel_pred.append_kernel(new CustomKernel(KK));
RealFeatures subkfeats_test = new RealFeatures(testdata_real);
feats_pred.append_feature_obj(subkfeats_train);
PolyKernel subkernel_pred = new PolyKernel(10,2);
kernel_pred.append_kernel(subkernel_pred);
kernel_pred.init(feats_train, feats_pred);
svm.set_kernel(kernel_pred);
obtain_from_generic(svm.apply());
DoubleMatrix km_train=kernel.get_kernel_matrix();
System.out.println(km_train.toString());
modshogun.exit_shogun();
}
}
// This is an example for the initialization of a combined kernel, which is a weighted sum of
// in this case three kernels on real valued data. The sub-kernel weights are all set to 1.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class kernel_combined_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int cardinality = 2;
int cache = 10;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
RealFeatures subfeats_train = new RealFeatures(traindata_real);
RealFeatures subfeats_test = new RealFeatures(testdata_real);
CombinedKernel kernel= new CombinedKernel();
CombinedFeatures feats_train = new CombinedFeatures();
CombinedFeatures feats_test = new CombinedFeatures();
GaussianKernel subkernel = new GaussianKernel(cache, 1.1);
feats_train.append_feature_obj(subfeats_train);
feats_test.append_feature_obj(subfeats_test);
kernel.append_kernel(subkernel);
StringCharFeatures subkfeats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures subkfeats_test = new StringCharFeatures(fm_test_dna, DNA);
int degree = 3;
FixedDegreeStringKernel subkernel2= new FixedDegreeStringKernel(10, degree);
feats_train.append_feature_obj(subkfeats_train);
feats_test.append_feature_obj(subkfeats_test);
kernel.append_kernel(subkernel2);
subkfeats_train = new StringCharFeatures(fm_train_dna, DNA);
subkfeats_test = new StringCharFeatures(fm_test_dna, DNA);
LocalAlignmentStringKernel subkernel3 = new LocalAlignmentStringKernel(10);
feats_train.append_feature_obj(subkfeats_train);
feats_test.append_feature_obj(subkfeats_test);
kernel.append_kernel(subkernel3);
kernel.init(feats_train, feats_train);
DoubleMatrix km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// This is an example for the initialization of the CommUlongString-kernel. This kernel
// sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
// that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
// only once.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_comm_ulong_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int order = 3;
int gap = 0;
boolean reverse = false;
boolean use_sign = false;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
StringUlongFeatures feats_train = new StringUlongFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
SortUlongString preproc = new SortUlongString();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
StringCharFeatures charfeat_test = new StringCharFeatures(DNA);
charfeat_test.set_features(fm_test_dna);
StringUlongFeatures feats_test = new StringUlongFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat_test, order-1, order, gap, reverse);
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
CommUlongStringKernel kernel = new CommUlongStringKernel(feats_train, feats_train, use_sign);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// This is an example for the initialization of the CommWordString-kernel (aka
// Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel
// sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
// that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
// only once.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_comm_word_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int order = 3;
int gap = 0;
boolean reverse = false;
boolean use_sign = false;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(DNA);
charfeat.set_features(fm_train_dna);
StringWordFeatures feats_train = new StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
SortWordString preproc = new SortWordString();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
StringCharFeatures charfeat_test = new StringCharFeatures(DNA);
charfeat_test.set_features(fm_test_dna);
StringWordFeatures feats_test = new StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat_test, order-1, order, gap, reverse);
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
CommWordStringKernel kernel = new CommWordStringKernel(feats_train, feats_train, use_sign);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// The constant kernel gives a trivial kernel matrix with all entries set to the same value
// defined by the argument 'c'.
//
import org.shogun.*;
import org.jblas.*;
public class kernel_const_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double c = 23;
DummyFeatures feats_train = new DummyFeatures(10);
DummyFeatures feats_test = new DummyFeatures(17);
ConstKernel kernel = new ConstKernel(feats_train, feats_train, c);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// A user defined custom kernel is assigned in this example, for which only the lower triangle
// may be given (set_triangle_kernel_matrix_from_triangle) or
// a full matrix (set_full_kernel_matrix_from_full), or a full matrix which is then internally stored as a
// triangle (set_triangle_kernel_matrix_from_full). Labels for the examples are given, a svm is trained and
// the svm is used to classify the examples.
//
import org.shogun.*;
import org.jblas.*;
public class kernel_custom_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int dim = 7;
DoubleMatrix data = DoubleMatrix.rand(dim, dim);
RealFeatures feats = new RealFeatures(data);
DoubleMatrix data_T = data.transpose();
DoubleMatrix symdata = data.add(data_T);
int cols = (1 + dim) * dim / 2;
DoubleMatrix lowertriangle = DoubleMatrix.zeros(1, cols);
int count = 0;
for (int i = 0; i < dim; i ++) {
for (int j = 0; j < dim; j++) {
if (j <= i) {
lowertriangle.put(0, count++, symdata.get(i,j));
}
}
}
CustomKernel kernel= new CustomKernel();
kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle);
DoubleMatrix km_triangletriangle = kernel.get_kernel_matrix();
kernel.set_triangle_kernel_matrix_from_full(symdata);
DoubleMatrix km_fulltriangle=kernel.get_kernel_matrix();
kernel.set_full_kernel_matrix_from_full(data);
DoubleMatrix km_fullfull=kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// This is an example for the initialization of the diag-kernel.
// The diag kernel has all kernel matrix entries but those on
// the main diagonal set to zero.
import org.shogun.*;
import org.jblas.*;
public class kernel_diag_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double diag = 23;
DummyFeatures feats_train = new DummyFeatures(10);
DummyFeatures feats_test = new DummyFeatures(17);
ConstKernel kernel = new ConstKernel(feats_train, feats_train, diag);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// With the distance kernel one can use any of the following distance metrics:
// BrayCurtisDistance()
// CanberraMetric()
// CanberraWordDistance()
// ChebyshewMetric()
// ChiSquareDistance()
// CosineDistance()
// Distance()
// EuclidianDistance()
// GeodesicMetric()
// HammingWordDistance()
// JensenMetric()
// ManhattanMetric()
// ManhattanWordDistance()
// MinkowskiMetric()
// RealDistance()
// SimpleDistance()
// SparseDistance()
// SparseEuclidianDistance()
// StringDistance()
// TanimotoDistance()
//
import org.shogun.*;
import org.jblas.*;
public class kernel_distance_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.7;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance();
DistanceKernel kernel = new DistanceKernel(feats_train, feats_test, width, distance);
DoubleMatrix dm_train = distance.get_distance_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix dm_test = distance.get_distance_matrix();
System.out.println(dm_train.toString());
System.out.println(dm_test.toString());
modshogun.exit_shogun();
}
}
// In this example the distant segments kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_distantsegments_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int delta = 5;
int theta = 5;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
DistantSegmentsKernel kernel = new DistantSegmentsKernel(feats_train, feats_train, 10, delta, theta);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// In this example the exponential kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class kernel_exponential_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double tau_coef = 5.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
ExponentialKernel kernel= new ExponentialKernel(feats_train, feats_train, tau_coef, distance, 10);
kernel.init(feats_train, feats_train);
DoubleMatrix km_train=kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// The class FKFeatures implements Fischer kernel features obtained from
// two Hidden Markov models.
//
// It was used in
//
// K. Tsuda, M. Kawanabe, G. Raetsch, S. Sonnenburg, and K.R. Mueller. A new
// discriminative kernel from probabilistic models. Neural Computation,
// 14:2397-2414, 2002.
//
// which also has the details.
//
// Note that FK-features are computed on the fly, so to be effective feature
// caching should be enabled.
//
// It inherits its functionality from CSimpleFeatures, which should be
// consulted for further reference.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import static org.shogun.BaumWelchViterbiType.BW_NORMAL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class kernel_fisher_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
boolean reverse = false;
int N = 1;
int M = 64;
double pseudo = 1e-5;
int order = 3;
int gap = 0;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
DoubleMatrix label_train_dna = Load.load_labels("../data/label_train_dna.dat");
ArrayList fm_hmm_pos_builder = new ArrayList();
ArrayList fm_hmm_neg_builder = new ArrayList();
for(int i = 0; i < label_train_dna.getColumns(); i++) {
if (label_train_dna.get(i) == 1)
fm_hmm_pos_builder.add(fm_train_dna[i]);
else
fm_hmm_neg_builder.add(fm_train_dna[i]);
}
int pos_size = fm_hmm_pos_builder.size();
int neg_size = fm_hmm_neg_builder.size();
String[] fm_hmm_pos = new String[pos_size];
String[] fm_hmm_neg = new String[neg_size];
for (int i = 0; i < pos_size; i++)
fm_hmm_pos[i] = (String)fm_hmm_pos_builder.get(i);
for (int i = 0; i < neg_size; i++)
fm_hmm_neg[i] = (String)fm_hmm_neg_builder.get(i);
StringCharFeatures charfeat = new StringCharFeatures(fm_hmm_pos, DNA);
StringWordFeatures hmm_pos_train = new StringWordFeatures(charfeat.get_alphabet());
hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
HMM pos = new HMM(hmm_pos_train, N, M, pseudo);
pos.baum_welch_viterbi_train(BW_NORMAL);
charfeat = new StringCharFeatures(fm_hmm_neg, DNA);
StringWordFeatures hmm_neg_train = new StringWordFeatures(charfeat.get_alphabet());
hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
HMM neg = new HMM(hmm_neg_train, N, M, pseudo);
neg.baum_welch_viterbi_train(BW_NORMAL);
charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures wordfeats_train = new StringWordFeatures(charfeat.get_alphabet());
wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
charfeat = new StringCharFeatures(fm_test_dna, DNA);
StringWordFeatures wordfeats_test = new StringWordFeatures(charfeat.get_alphabet());
wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
pos.set_observations(wordfeats_train);
neg.set_observations(wordfeats_train);
FKFeatures feats_train = new FKFeatures(10, pos, neg);
feats_train.set_opt_a(-1);
PolyKernel kernel = new PolyKernel(feats_train, feats_train, 1, true);
DoubleMatrix km_train = kernel.get_kernel_matrix();
HMM pos_clone = new HMM(pos);
HMM neg_clone = new HMM(neg);
pos_clone.set_observations(wordfeats_test);
neg_clone.set_observations(wordfeats_test);
FKFeatures feats_test = new FKFeatures(10, pos_clone, neg_clone);
feats_test.set_a(feats_train.get_a());
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// The FixedDegree String kernel takes as input two strings of same size and counts the number of matches of length d.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_fixed_degree_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int degree = 4;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
FixedDegreeStringKernel kernel = new FixedDegreeStringKernel(feats_train, feats_train, degree);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.
import org.shogun.*;
import org.jblas.*;
public class kernel_gaussian_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.3;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// An experimental kernel inspired by the WeightedDegreePositionStringKernel and the Gaussian kernel.
// The idea is to shift the dimensions of the input vectors against eachother. 'shift_step' is the step
// size of the shifts and max_shift is the maximal shift.
import org.shogun.*;
import org.jblas.*;
public class kernel_gaussian_shift_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.8;
int max_shift = 2;
int shift_step = 1;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
GaussianShiftKernel kernel = new GaussianShiftKernel(feats_train, feats_train, width, max_shift, shift_step);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// The HistogramWordString computes the TOP kernel on inhomogeneous Markov Chains.
import org.shogun.*;
import org.jblas.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.shogun.EAlphabet.DNA;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class kernel_histogram_word_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
boolean reverse = false;
int order = 3;
int gap = 0;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures feats_train = new StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, false);
charfeat = new StringCharFeatures(fm_test_dna, DNA);
StringWordFeatures feats_test = new StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, false);
BinaryLabels labels = new BinaryLabels(Load.load_labels("../data/label_train_dna.dat"));
PluginEstimate pie = new PluginEstimate();
pie.set_labels(labels);
pie.set_features(feats_train);
pie.train();
HistogramWordStringKernel kernel = new HistogramWordStringKernel(feats_train, feats_train, pie);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
pie.set_features(feats_test);
obtain_from_generic(pie.apply()).get_labels();
DoubleMatrix km_test=kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// In this example the inverse multiquadic kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_inversemultiquadric_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double shift_coef = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
InverseMultiQuadricKernel kernel = new InverseMultiQuadricKernel(feats_train, feats_test, shift_coef, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// example on saving a kernel to a file
import org.shogun.*;
import org.jblas.*;
public class kernel_io_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.2;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_test, width);
DoubleMatrix km_train = kernel.get_kernel_matrix();
AsciiFile f=new AsciiFile("gaussian_train.ascii",'w');
kernel.save(f);
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
AsciiFile f_test=new AsciiFile("gaussian_train.ascii",'w');
kernel.save(f_test);
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// This is an example for the initialization of a linear kernel on raw byte
// data.
import org.shogun.*;
import org.jblas.*;
public class kernel_linear_byte_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double scale = 1.2;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_byte.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_byte.dat");
ByteFeatures feats_train = new ByteFeatures(traindata_real);
ByteFeatures feats_test = new ByteFeatures(testdata_real);
LinearKernel kernel = new LinearKernel(feats_train, feats_test);
kernel.set_normalizer(new AvgDiagKernelNormalizer(scale));
kernel.init(feats_train, feats_train);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// This is an example for the initialization of a linear kernel on real valued
// data using scaling factor 1.2.
import org.shogun.*;
import org.jblas.*;
public class kernel_linear_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double scale = 1.2;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
LinearKernel kernel = new LinearKernel(feats_train, feats_test);
kernel.set_normalizer(new AvgDiagKernelNormalizer(scale));
kernel.init(feats_train, feats_train);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// This is an example for the initialization of a linear kernel on string data. The
// strings are all of the same length and consist of the characters 'ACGT' corresponding
// to the DNA-alphabet. Each column of the matrices of type char corresponds to
// one training/test example.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_linear_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
LinearStringKernel kernel = new LinearStringKernel(feats_train, feats_train);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// This is an example for the initialization of a linear kernel on word (2byte)
// data.
import org.shogun.*;
import org.jblas.*;
public class kernel_linear_word_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double scale = 1.2;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_word.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_word.dat");
WordFeatures feats_train = new WordFeatures(traindata_real);
WordFeatures feats_test = new WordFeatures(testdata_real);
LinearKernel kernel = new LinearKernel(feats_train, feats_test);
kernel.set_normalizer(new AvgDiagKernelNormalizer(scale));
kernel.init(feats_train, feats_train);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// This is an example for the initialization of the local alignment kernel on
// DNA sequences, where each column of the matrices of type char corresponds to
// one training/test example.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_local_alignment_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
LocalAlignmentStringKernel kernel = new LocalAlignmentStringKernel(feats_train, feats_train);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// The LocalityImprovedString kernel is inspired by the polynomial kernel.
// Comparing neighboring characters it puts emphasize on local features.
//
// It can be defined as
// K({\bf x},{\bf x'})=\left(\sum_{i=0}^{T-1}\left(\sum_{j=-l}^{+l}w_jI_{i+j}({\bf x},{\bf x'})\right)^{d_1}\right)^{d_2},
// where
// I_i({\bf x},{\bf x'})=1
// if $x_i=x'_i and 0 otherwise.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_locality_improved_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int length = 5;
int inner_degree = 5;
int outer_degree = 7;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
LocalityImprovedStringKernel kernel = new LocalityImprovedStringKernel(feats_train, feats_train, length, inner_degree, outer_degree);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// In this example the log kernel (logarithm of the distance powered by degree plus one) is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_log_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double degree = 2.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
WaveKernel kernel = new WaveKernel(feats_train, feats_test, degree, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example the match word string kernel is being computed for toy data
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_match_word_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int degree = 20;
double scale = 1.4;
int size_cache = 10;
int order = 3;
int gap = 0;
boolean reverse = false;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures feats_train = new StringWordFeatures(DNA);
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
StringCharFeatures charfeat_test = new StringCharFeatures(fm_test_dna, DNA);
StringWordFeatures feats_test = new StringWordFeatures(DNA);
feats_test.obtain_from_char(charfeat_test, order-1, order, gap, reverse);
MatchWordStringKernel kernel = new MatchWordStringKernel(size_cache, degree);
kernel.set_normalizer(new AvgDiagKernelNormalizer(scale));
kernel.init(feats_train, feats_train);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// In this example the multiquadric kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_multiquadric_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double shift_coef = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
MultiquadricKernel kernel = new MultiquadricKernel(feats_train, feats_test, shift_coef, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// This is an example initializing the oligo string kernel which takes distances
// between matching oligos (k-mers) into account via a gaussian. Variable 'k' defines the length
// of the oligo and variable 'w' the width of the gaussian. The oligo string kernel is
// implemented for the DNA-alphabet 'ACGT'.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_oligo_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int size_cache = 3;
int k = 1;
double width = 10;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
OligoStringKernel kernel = new OligoStringKernel(size_cache, k, width);
kernel.init(feats_train, feats_train);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// In this example the poly match string kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class kernel_poly_match_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
boolean reverse = false;
int degree = 3;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
PolyMatchStringKernel kernel = new PolyMatchStringKernel(feats_train, feats_train, degree, true);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// This is an example for the initialization of the PolyMatchString kernel on string data.
// The PolyMatchString kernel sums over the matches of two stings of the same length and
// takes the sum to the power of 'degree'. The strings consist of the characters 'ACGT' corresponding
// to the DNA-alphabet. Each column of the matrices of type char corresponds to
// one training/test example.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class kernel_poly_match_word_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
boolean reverse = false;
int order = 3;
int gap = 0;
int degree = 2;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures feats_train = new StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, false);
charfeat = new StringCharFeatures(fm_test_dna, DNA);
StringWordFeatures feats_test = new StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, false);
PolyMatchWordStringKernel kernel = new PolyMatchWordStringKernel(feats_train, feats_train, degree, true);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// This example initializes the polynomial kernel with real data.
// If variable 'inhomogene' is 'True' +1 is added to the scalar product
// before taking it to the power of 'degree'. If 'use_normalization' is
// set to 'true' then kernel matrix will be normalized by the square roots
// of the diagonal entries.
import org.shogun.*;
import org.jblas.*;
public class kernel_poly_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int degree = 4;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
PolyKernel kernel = new PolyKernel(feats_train, feats_train, degree, false);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example the power kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_power_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double degree = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
PowerKernel kernel = new PowerKernel(feats_train, feats_test, degree, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example the rational quadratic kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_rationalquadratic_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double shift_coef = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
RationalQuadraticKernel kernel = new RationalQuadraticKernel(feats_train, feats_test, shift_coef, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// The SalzbergWordString kernel implements the Salzberg kernel.
//
// It is described in
//
// Engineering Support Vector Machine Kernels That Recognize Translation Initiation Sites
// A. Zien, G.Raetsch, S. Mika, B. Schoelkopf, T. Lengauer, K.-R. Mueller
//
import org.shogun.*;
import org.jblas.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.shogun.EAlphabet.DNA;
import static org.shogun.BinaryLabels.obtain_from_generic;
public class kernel_salzberg_word_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
boolean reverse = false;
int order = 3;
int gap = 0;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures feats_train = new StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, false);
charfeat = new StringCharFeatures(fm_test_dna, DNA);
StringWordFeatures feats_test = new StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, false);
BinaryLabels labels = new BinaryLabels(Load.load_labels("../data/label_train_dna.dat"));
PluginEstimate pie = new PluginEstimate();
pie.set_labels(labels);
pie.set_features(feats_train);
pie.train();
SalzbergWordStringKernel kernel = new SalzbergWordStringKernel(feats_train, feats_train, pie, labels);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
pie.set_features(feats_test);
obtain_from_generic(pie.apply()).get_labels();
DoubleMatrix km_test=kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// The standard Sigmoid kernel computed on dense real valued features.
import org.shogun.*;
import org.jblas.*;
public class kernel_sigmoid_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int size_cache = 10;
double gamma = 1.2;
double coef0 = 1.3;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
SigmoidKernel kernel = new SigmoidKernel(feats_train, feats_test, size_cache, gamma, coef0);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// SimpleLocalityImprovedString kernel, is a `simplified' and better performing version of the Locality improved kernel.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_simple_locality_improved_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int length = 5;
int inner_degree = 3;
int outer_degree = 2;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
SimpleLocalityImprovedStringKernel kernel = new SimpleLocalityImprovedStringKernel(feats_train, feats_train, length, inner_degree, outer_degree);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example the spherical kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_spherical_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double sigma = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
MultiquadricKernel kernel = new MultiquadricKernel(feats_train, feats_test, sigma, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example the spline kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_spline_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double sigma = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
SplineKernel kernel = new SplineKernel(feats_train, feats_test);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// The class TOPFeatures implements TOP kernel features obtained from
// two Hidden Markov models.
//
// It was used in
//
// K. Tsuda, M. Kawanabe, G. Raetsch, S. Sonnenburg, and K.R. Mueller. A new
// discriminative kernel from probabilistic models. Neural Computation,
// 14:2397-2414, 2002.
//
// which also has the details.
//
// Note that TOP-features are computed on the fly, so to be effective feature
// caching should be enabled.
//
// It inherits its functionality from CSimpleFeatures, which should be
// consulted for further reference.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import static org.shogun.BaumWelchViterbiType.BW_NORMAL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class kernel_top_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
boolean reverse = false;
int N = 1;
int M = 64;
double pseudo = 1e-5;
int order = 3;
int gap = 0;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
DoubleMatrix label_train_dna = Load.load_labels("../data/label_train_dna.dat");
List fm_hmm_pos_builder = new ArrayList();
List fm_hmm_neg_builder = new ArrayList();
for(int i = 0; i < label_train_dna.getColumns(); i++) {
if (label_train_dna.get(i) == 1)
fm_hmm_pos_builder.add(fm_train_dna[i]);
else
fm_hmm_neg_builder.add(fm_train_dna[i]);
}
int pos_size = fm_hmm_pos_builder.size();
int neg_size = fm_hmm_neg_builder.size();
String[] fm_hmm_pos = new String[pos_size];
String[] fm_hmm_neg = new String[neg_size];
for (int i = 0; i < pos_size; i++)
fm_hmm_pos[i] = (String)fm_hmm_pos_builder.get(i);
for (int i = 0; i < neg_size; i++)
fm_hmm_neg[i] = (String)fm_hmm_neg_builder.get(i);
StringCharFeatures charfeat = new StringCharFeatures(fm_hmm_pos, DNA);
StringWordFeatures hmm_pos_train = new StringWordFeatures(charfeat.get_alphabet());
hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
HMM pos = new HMM(hmm_pos_train, N, M, pseudo);
pos.baum_welch_viterbi_train(BW_NORMAL);
charfeat = new StringCharFeatures(fm_hmm_neg, DNA);
StringWordFeatures hmm_neg_train = new StringWordFeatures(charfeat.get_alphabet());
hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
HMM neg = new HMM(hmm_neg_train, N, M, pseudo);
neg.baum_welch_viterbi_train(BW_NORMAL);
charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures wordfeats_train = new StringWordFeatures(charfeat.get_alphabet());
wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
charfeat = new StringCharFeatures(fm_test_dna, DNA);
StringWordFeatures wordfeats_test = new StringWordFeatures(charfeat.get_alphabet());
wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
pos.set_observations(wordfeats_train);
neg.set_observations(wordfeats_train);
TOPFeatures feats_train = new TOPFeatures(10, pos, neg, false, false);
PolyKernel kernel = new PolyKernel(feats_train, feats_train, 1, true);
DoubleMatrix km_train = kernel.get_kernel_matrix();
HMM pos_clone = new HMM(pos);
HMM neg_clone = new HMM(neg);
pos_clone.set_observations(wordfeats_test);
neg_clone.set_observations(wordfeats_test);
TOPFeatures feats_test = new TOPFeatures(10, pos_clone, neg_clone, false, false);
kernel.init(feats_train, feats_test);
DoubleMatrix km_test=kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// In this example the t-Student's kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_tstudent_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double degree = 2.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
TStudentKernel kernel = new TStudentKernel(feats_train, feats_test, degree, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
distance.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example the wave kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_wave_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double theta = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
EuclideanDistance distance = new EuclideanDistance(feats_train, feats_train);
WaveKernel kernel = new WaveKernel(feats_train, feats_test, theta, distance);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example the wavelet kernel is being computed for toy data.
import org.shogun.*;
import org.jblas.*;
public class kernel_wavelet_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double theta = 1.0;
double dilation = 1.5;
double translation = 1.0;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
WaveletKernel kernel = new WaveletKernel(feats_train, feats_test, 10, dilation, translation);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// The WeightedCommWordString kernel may be used to compute the weighted
// spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer
// length is weighted by some coefficient \f$\beta_k\f$) from strings that have
// been mapped into unsigned 16bit integers.
//
// These 16bit integers correspond to k-mers. To applicable in this kernel they
// need to be sorted (e.g. via the SortWordString pre-processor).
//
// It basically uses the algorithm in the unix "comm" command (hence the name)
// to compute:
//
// k({\bf x},({\bf x'})= \sum_{k=1}^K\beta_k\Phi_k({\bf x})\cdot \Phi_k({\bf x'})
//
// where \f$\Phi_k\f$ maps a sequence \f${\bf x}\f$ that consists of letters in
// \f$\Sigma\f$ to a feature vector of size \f$|\Sigma|^k\f$. In this feature
// vector each entry denotes how often the k-mer appears in that \f${\bf x}\f$.
//
// Note that this representation is especially tuned to small alphabets
// (like the 2-bit alphabet DNA), for which it enables spectrum kernels
// of order 8.
//
// For this kernel the linadd speedups are quite efficiently implemented using
// direct maps.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class kernel_weighted_comm_word_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int degree = 20;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
WeightedDegreePositionStringKernel kernel = new WeightedDegreePositionStringKernel(feats_train, feats_train, degree);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// The Weighted Degree Position String kernel (Weighted Degree kernel with shifts).
//
// The WD-shift kernel of order d compares two sequences X and
// Y of length L by summing all contributions of k-mer matches of
// lengths k in 1...d, weighted by coefficients beta_k
// allowing for a positional tolerance of up to shift s.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class kernel_weighted_degree_position_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int degree = 3;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
WeightedDegreePositionStringKernel kernel = new WeightedDegreePositionStringKernel(feats_train, feats_train, degree);
kernel.set_shifts(DoubleMatrix.ones(1, (fm_train_dna[0]).length()));
kernel.set_position_weights(DoubleMatrix.ones(1, (fm_train_dna[0]).length()));
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// This examples shows how to create a Weighted Degree String Kernel from data
// and how to compute the kernel matrix from the resulting object.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class kernel_weighted_degree_string_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int degree = 3;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures feats_train = new StringCharFeatures(fm_train_dna, DNA);
StringCharFeatures feats_test = new StringCharFeatures(fm_test_dna, DNA);
WeightedDegreeStringKernel kernel = new WeightedDegreeStringKernel(feats_train, feats_train, degree);
double [] w = new double[degree];
double sum = degree * (degree + 1)/2;
for (int i = 0; i < degree; i++) {
w[i] = (degree - i)/sum;
}
DoubleMatrix weights = new DoubleMatrix(1, degree, w);
kernel.set_wd_weights(weights);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// In this example we show how to perform Multiple Kernel Learning (MKL)
// with the modular interface. First, we create a number of base kernels.
// These kernels can capture different views of the same features, or actually
// consider entirely different features associated with the same example
// (e.g. DNA sequences = strings AND gene expression data = real values of the same tissue sample).
// The base kernels are then subsequently added to a CombinedKernel, which
// contains a weight for each kernel and encapsulates the base kernels
// from the training procedure. When the CombinedKernel between two examples is
// evaluated it computes the corresponding linear combination of kernels according to their weights.
// We then show how to create an MKLClassifier that trains an SVM and learns the optimal
// weighting of kernels (w.r.t. a given norm q) at the same time.
// Finally, the example shows how to classify with a trained MKLClassifier.
//
import org.shogun.*;
import org.jblas.*;
public class mkl_binclass_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
int mkl_norm = 2;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures tfeats = new RealFeatures(traindata_real);
PolyKernel tkernel = new PolyKernel(10,3);
tkernel.init(tfeats, tfeats);
DoubleMatrix K_train = tkernel.get_kernel_matrix();
RealFeatures pfeats = new RealFeatures(testdata_real);
tkernel.init(tfeats, pfeats);
DoubleMatrix K_test = tkernel.get_kernel_matrix();
CombinedFeatures feats_train = new CombinedFeatures();
feats_train.append_feature_obj(new RealFeatures(traindata_real));
CombinedKernel kernel = new CombinedKernel();
kernel.append_kernel(new CustomKernel(K_train));
kernel.append_kernel(new PolyKernel(10,2));
kernel.init(feats_train, feats_train);
BinaryLabels labels = new BinaryLabels(trainlab);
MKLClassification mkl = new MKLClassification();
mkl.set_mkl_norm(1);
mkl.set_kernel(kernel);
mkl.set_labels(labels);
mkl.train();
CombinedFeatures feats_pred = new CombinedFeatures();
feats_pred.append_feature_obj(new RealFeatures(testdata_real));
CombinedKernel kernel2 = new CombinedKernel();
kernel2.append_kernel(new CustomKernel(K_test));
kernel2.append_kernel(new PolyKernel(10, 2));
kernel2.init(feats_train, feats_pred);
mkl.set_kernel(kernel2);
mkl.apply();
modshogun.exit_shogun();
}
}
// In this example we show how to perform Multiple Kernel Learning (MKL)
// with the modular interface for multi-class classification.
// First, we create a number of base kernels and features.
// These kernels can capture different views of the same features, or actually
// consider entirely different features associated with the same example
// (e.g. DNA sequences = strings AND gene expression data = real values of the same tissue sample).
// The base kernels are then subsequently added to a CombinedKernel, which
// contains a weight for each kernel and encapsulates the base kernels
// from the training procedure. When the CombinedKernel between two examples is
// evaluated it computes the corresponding linear combination of kernels according to their weights.
// We then show how to create an MKLMultiClass classifier that trains an SVM and learns the optimal
// weighting of kernels (w.r.t. a given norm q) at the same time. The main difference to the binary
// classification version of MKL is that we can use more than two values as labels, when training
// the classifier.
// Finally, the example shows how to classify with a trained MKLMultiClass classifier.
//
import org.shogun.*;
import org.jblas.*;
import static org.shogun.MulticlassLabels.obtain_from_generic;
public class mkl_multiclass_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.1;
double epsilon = 1e-5;
double C = 1.0;
int mkl_norm = 2;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_multiclass.dat");
CombinedKernel kernel = new CombinedKernel();
CombinedFeatures feats_train = new CombinedFeatures();
CombinedFeatures feats_test = new CombinedFeatures();
RealFeatures subkfeats1_train = new RealFeatures(traindata_real);
RealFeatures subkfeats1_test = new RealFeatures(testdata_real);
GaussianKernel subkernel = new GaussianKernel(10, width);
feats_train.append_feature_obj(subkfeats1_train);
feats_test.append_feature_obj(subkfeats1_test);
kernel.append_kernel(subkernel);
RealFeatures subkfeats2_train = new RealFeatures(traindata_real);
RealFeatures subkfeats2_test = new RealFeatures(testdata_real);
LinearKernel subkernel2 = new LinearKernel();
feats_train.append_feature_obj(subkfeats2_train);
feats_test.append_feature_obj(subkfeats2_test);
kernel.append_kernel(subkernel2);
RealFeatures subkfeats3_train = new RealFeatures(traindata_real);
RealFeatures subkfeats3_test = new RealFeatures(testdata_real);
PolyKernel subkernel3 = new PolyKernel(10, 2);
feats_train.append_feature_obj(subkfeats3_train);
feats_test.append_feature_obj(subkfeats3_test);
kernel.append_kernel(subkernel3);
kernel.init(feats_train, feats_train);
MulticlassLabels labels = new MulticlassLabels(trainlab);
MKLMulticlass mkl = new MKLMulticlass(C, kernel, labels);
mkl.set_epsilon(epsilon);
mkl.set_mkl_epsilon(epsilon);
mkl.set_mkl_norm(mkl_norm);
mkl.train();
kernel.init(feats_train, feats_test);
DoubleMatrix out = obtain_from_generic(mkl.apply()).get_labels();
modshogun.exit_shogun();
}
}
// In this example toy data is being processed using the kernel PCA algorithm
// as described in
//
// Schölkopf, B., Smola, A. J., & Muller, K. R. (1999).
// Kernel Principal Component Analysis.
// Advances in kernel methods support vector learning, 1327(3), 327-352. MIT Press.
// Retrieved from http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.32.8744i
//
// A gaussian kernel is used for the processing.
import org.shogun.*;
import org.jblas.*;
public class preprocessor_kernelpca_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 2.0;
double threshold = 0.05;
DoubleMatrix data = Load.load_numbers("../data/fm_train_real.dat");
RealFeatures features = new RealFeatures(data);
GaussianKernel kernel = new GaussianKernel(features, features, width);
KernelPCA preprocessor = new KernelPCA(kernel);
preprocessor.init(features);
preprocessor.apply_to_feature_matrix(features);
modshogun.exit_shogun();
}
}
// In this example a kernel matrix is computed for a given real-valued data set.
// The kernel used is the Chi2 kernel which operates on real-valued vectors. It
// computes the chi-squared distance between sets of histograms. It is a very
// useful distance in image recognition (used to detect objects). The preprocessor
// LogPlusOne adds one to a dense real-valued vector and takes the logarithm of
// each component of it. It is most useful in situations where the inputs are
// counts: When one compares differences of small counts any difference may matter
// a lot, while small differences in large counts don't. This is what this log
// transformation controls for.
import org.shogun.*;
import org.jblas.*;
public class preprocessor_logplusone_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.4;
int size_cache = 10;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
LogPlusOne preproc = new LogPlusOne();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
Chi2Kernel kernel = new Chi2Kernel(feats_train, feats_train, width, size_cache);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example a kernel matrix is computed for a given real-valued data set.
// The kernel used is the Chi2 kernel which operates on real-valued vectors. It
// computes the chi-squared distance between sets of histograms. It is a very
// useful distance in image recognition (used to detect objects). The preprocessor
// NormOne, normalizes vectors to have norm 1.
import org.shogun.*;
import org.jblas.*;
public class preprocessor_normone_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.4;
int size_cache = 10;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
NormOne preproc = new NormOne();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
Chi2Kernel kernel = new Chi2Kernel(feats_train, feats_train, width, size_cache);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example toy data is being processed using the
// Principal Component Analysis.
import org.shogun.*;
import org.jblas.*;
public class preprocessor_pca_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.4;
int size_cache = 10;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
RealFeatures features = new RealFeatures(traindata_real);
PCA preproc = new PCA();
preproc.init(features);
preproc.apply_to_feature_matrix(features);
modshogun.exit_shogun();
}
}
// In this example a kernel matrix is computed for a given real-valued data set.
// The kernel used is the Chi2 kernel which operates on real-valued vectors. It
// computes the chi-squared distance between sets of histograms. It is a very
// useful distance in image recognition (used to detect objects). The preprocessor
// PruneVarSubMean substracts the mean from each feature and removes features that
// have zero variance.
import org.shogun.*;
import org.jblas.*;
public class preprocessor_prunevarsubmean_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.4;
int size_cache = 10;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
PruneVarSubMean preproc = new PruneVarSubMean();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
Chi2Kernel kernel = new Chi2Kernel(feats_train, feats_train, width, size_cache);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
import org.shogun.*;
import org.jblas.*;
public class preprocessor_randomfouriergausspreproc_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 1.4;
int size_cache = 10;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
RandomFourierGaussPreproc preproc = new RandomFourierGaussPreproc();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
Chi2Kernel kernel = new Chi2Kernel(feats_train, feats_train, width, size_cache);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
System.out.println(km_train.toString());
System.out.println(km_test.toString());
modshogun.exit_shogun();
}
}
// In this example a kernel matrix is computed for a given string data set. The
// CommUlongString kernel is used to compute the spectrum kernel from strings that
// have been mapped into unsigned 64bit integers. These 64bit integers correspond
// to k-mers. To be applicable in this kernel the mapped k-mers have to be sorted.
// This is done using the SortUlongString preprocessor, which sorts the indivual
// strings in ascending order. The kernel function basically uses the algorithm in
// the unix "comm" command (hence the name). Note that this representation enables
// spectrum kernels of order 8 for 8bit alphabets (like binaries) and order 32 for
// 2-bit alphabets like DNA. For this kernel the linadd speedups are implemented
// (though there is room for improvement here when a whole set of sequences is
// ADDed) using sorted lists.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class preprocessor_sortulongstring_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
boolean reverse = false;
int order = 3;
int gap = 0;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringUlongFeatures feats_train = new StringUlongFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
charfeat = new StringCharFeatures(fm_test_dna, DNA);
StringUlongFeatures feats_test = new StringUlongFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
SortUlongString preproc = new SortUlongString();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
CommUlongStringKernel kernel = new CommUlongStringKernel(feats_train, feats_train, false);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// In this example a kernel matrix is computed for a given string data set. The
// CommWordString kernel is used to compute the spectrum kernel from strings that
// have been mapped into unsigned 16bit integers. These 16bit integers correspond
// to k-mers. To be applicable in this kernel the mapped k-mers have to be sorted.
// This is done using the SortWordString preprocessor, which sorts the indivual
// strings in ascending order. The kernel function basically uses the algorithm in
// the unix "comm" command (hence the name). Note that this representation is
// especially tuned to small alphabets (like the 2-bit alphabet DNA), for which it
// enables spectrum kernels of order up to 8. For this kernel the linadd speedups
// are quite efficiently implemented using direct maps.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class preprocessor_sortwordstring_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
boolean reverse = false;
int order = 3;
int gap = 0;
String[] fm_train_dna = Load.load_dna("../data/fm_train_dna.dat");
String[] fm_test_dna = Load.load_dna("../data/fm_test_dna.dat");
StringCharFeatures charfeat = new StringCharFeatures(fm_train_dna, DNA);
StringWordFeatures feats_train = new StringWordFeatures(charfeat.get_alphabet());
feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse);
charfeat = new StringCharFeatures(fm_test_dna, DNA);
StringWordFeatures feats_test = new StringWordFeatures(charfeat.get_alphabet());
feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse);
SortWordString preproc = new SortWordString();
preproc.init(feats_train);
feats_train.add_preprocessor(preproc);
feats_train.apply_preprocessor();
feats_test.add_preprocessor(preproc);
feats_test.apply_preprocessor();
CommWordStringKernel kernel = new CommWordStringKernel(feats_train, feats_train, false);
DoubleMatrix km_train = kernel.get_kernel_matrix();
kernel.init(feats_train, feats_test);
DoubleMatrix km_test = kernel.get_kernel_matrix();
modshogun.exit_shogun();
}
}
// In this example a kernelized version of ridge regression (KRR) is trained on a
// real-valued data set. The KRR is trained with regularization parameter tau=1e-6
// and a gaussian kernel with width=0.8. The labels of both the train and the test
// data can be fetched via krr.classify().get_labels().
import org.shogun.*;
import org.jblas.*;
import static org.shogun.RegressionLabels.obtain_from_generic;
public class regression_krr_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 0.8;
double tau = 1e-6;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
GaussianKernel kernel= new GaussianKernel(feats_train, feats_train, width);
RegressionLabels labels = new RegressionLabels(trainlab);
KernelRidgeRegression krr = new KernelRidgeRegression(tau, kernel, labels);
krr.train(feats_train);
kernel.init(feats_train, feats_test);
DoubleMatrix out_labels = obtain_from_generic(krr.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a support vector regression algorithm is trained on a
// real-valued toy data set. The underlying library used for the SVR training is
// LIBSVM. The SVR is trained with regularization parameter C=1 and a gaussian
// kernel with width=2.1. The labels of both the train and the test data are
// fetched via svr.classify().get_labels().
//
// For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/ .
import org.shogun.*;
import org.jblas.*;
import static org.shogun.RegressionLabels.obtain_from_generic;
public class regression_libsvr_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 0.8;
int C = 1;
double epsilon = 1e-5;
double tube_epsilon = 1e-2;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
GaussianKernel kernel= new GaussianKernel(feats_train, feats_train, width);
RegressionLabels labels = new RegressionLabels(trainlab);
LibSVR svr = new LibSVR(C, tube_epsilon, kernel, labels);
svr.set_epsilon(epsilon);
svr.train();
kernel.init(feats_train, feats_test);
DoubleMatrix out_labels = obtain_from_generic(svr.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example a support vector regression algorithm is trained on a
// real-valued toy data set. The underlying library used for the SVR training is
// SVM^light. The SVR is trained with regularization parameter C=1 and a gaussian
// kernel with width=2.1. The the label of both the train and the test data are
// fetched via svr.classify().get_labels().
//
// For more details on the SVM^light see
// T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
// Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
import org.shogun.*;
import org.jblas.*;
import static org.shogun.RegressionLabels.obtain_from_generic;
public class regression_svrlight_modular {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
double width = 0.8;
int C = 1;
double epsilon = 1e-5;
double tube_epsilon = 1e-2;
int num_threads = 3;
DoubleMatrix traindata_real = Load.load_numbers("../data/fm_train_real.dat");
DoubleMatrix testdata_real = Load.load_numbers("../data/fm_test_real.dat");
DoubleMatrix trainlab = Load.load_labels("../data/label_train_twoclass.dat");
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
GaussianKernel kernel= new GaussianKernel(feats_train, feats_train, width);
RegressionLabels labels = new RegressionLabels(trainlab);
SVRLight svr = new SVRLight(C, epsilon, kernel, labels);
svr.set_tube_epsilon(tube_epsilon);
//svr.parallel.set_num_threads(num_threads);
svr.train();
kernel.init(feats_train, feats_test);
DoubleMatrix out_labels = obtain_from_generic(svr.apply()).get_labels();
System.out.println(out_labels.toString());
modshogun.exit_shogun();
}
}
// In this example serialization of SVM (Support Vector Machine) is shown
import org.shogun.*;
import org.jblas.*;
import static org.jblas.MatrixFunctions.signum;
import static org.jblas.DoubleMatrix.concatHorizontally;
import static org.jblas.DoubleMatrix.zeros;
import static org.jblas.DoubleMatrix.ones;
import static org.jblas.DoubleMatrix.randn;
import java.util.ArrayList;
import java.io.*;
public class serialization_complex_example {
static {
System.loadLibrary("modshogun");
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int num = 1000;
double dist = 1.0;
double width = 2.1;
double C = 1.0;
DoubleMatrix offs=ones(2, num).mmul(dist);
DoubleMatrix x = randn(2, num).sub(offs);
DoubleMatrix y = randn(2, num).add(offs);
DoubleMatrix traindata_real = concatHorizontally(x, y);
DoubleMatrix o0 = zeros(1,num);
DoubleMatrix o1 = ones(1,num);
DoubleMatrix trainlab = concatHorizontally(o0, o1);
DoubleMatrix testlab = concatHorizontally(o0, o1);
RealFeatures feats = new RealFeatures(traindata_real);
GaussianKernel kernel = new GaussianKernel(feats, feats, width);
MulticlassLabels labels = new MulticlassLabels(trainlab);
GMNPSVM svm = new GMNPSVM(C, kernel, labels);
feats.add_preprocessor(new NormOne());
feats.add_preprocessor(new LogPlusOne());
feats.set_preprocessed(1);
svm.train(feats);
SerializableAsciiFile fstream = new SerializableAsciiFile("blaah.asc", 'w');
//svm.save_serializable(fstream);
modshogun.exit_shogun();
}
}
// This example shows how to use boost serialization (only available if the compile flag was enabled)
// to serialize/deserialize an SVMLight object. Note that this code is in alpha state.
import org.shogun.*;
import org.jblas.*;
import static org.jblas.MatrixFunctions.signum;
import static org.jblas.DoubleMatrix.concatHorizontally;
import static org.jblas.DoubleMatrix.ones;
import static org.jblas.DoubleMatrix.randn;
import java.util.ArrayList;
import java.io.*;
public class serialization_svmlight_modular {
static {
System.loadLibrary("modshogun");
}
public static void save(String fname, Object obj) {
try {
ObjectOutputStream out = new ObjectOutputStream(new FileOutputStream(fname));
out.writeObject(obj);
out.close();
}
catch(Exception ex){
ex.printStackTrace();
}
}
public static Object load(String fname) {
Object r = null;
try {
ObjectInputStream in = new ObjectInputStream(new FileInputStream(fname));
r = in.readObject();
in.close();
return r;
}
catch(Exception ex){
ex.printStackTrace();
}
return r;
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int num = 1000;
double dist = 1.0;
double width = 2.1;
double C = 1.0;
DoubleMatrix offs=ones(2, num).mmul(dist);
DoubleMatrix x = randn(2, num).sub(offs);
DoubleMatrix y = randn(2, num).add(offs);
DoubleMatrix traindata_real = concatHorizontally(x, y);
DoubleMatrix m = randn(2, num).sub(offs);
DoubleMatrix n = randn(2, num).add(offs);
DoubleMatrix testdata_real = concatHorizontally(m, n);
DoubleMatrix o = ones(1,num);
DoubleMatrix trainlab = concatHorizontally(o.neg(), o);
DoubleMatrix testlab = concatHorizontally(o.neg(), o);
RealFeatures feats_train = new RealFeatures(traindata_real);
RealFeatures feats_test = new RealFeatures(testdata_real);
GaussianKernel kernel = new GaussianKernel(feats_train, feats_train, width);
BinaryLabels labels = new BinaryLabels(trainlab);
SVMLight svm = new SVMLight(C, kernel, labels);
svm.train();
String fname = "tmp.dat";
save(fname, svm);
SVMLight svm_readed = (SVMLight)load(fname);
modshogun.exit_shogun();
}
}
import org.shogun.*;
import org.jblas.*;
import static org.shogun.EAlphabet.DNA;
public class tests_check_commwordkernel_memleak_modular {
static {
System.loadLibrary("modshogun");
}
public static String repeat(String toRepeat, int num) {
StringBuilder repeated = new StringBuilder(toRepeat.length() * num);
for (int i = 0; i < num; i++)
repeated.append(toRepeat);
return repeated.toString();
}
public static void main(String argv[]) {
modshogun.init_shogun_with_defaults();
int num = 10;
int order = 7;
int gap = 0;
boolean reverse = false;
String POS[] = new String[141];
for (int i = 0; i < 60; i++) {
POS[i] = repeat("ACGT", 10);
}
for (int i = 60; i < 82; i++) {
POS[i] = repeat("TTGT", 10);
}
for (int i = 82; i < 141; i++) {
POS[i] = repeat("ACGT", 10);
}
String NEG[] = new String[141];
for (int i = 0; i < 60; i++) {
NEG[i] = repeat("ACGT", 10);
}
for (int i = 60; i < 82; i++) {
NEG[i] = repeat("TTGT", 10);
}
for (int i = 82; i < 141; i++) {
NEG[i] = repeat("ACGT", 10);
}
String POSNEG[] = new String[282];
for (int i = 0; i < 141; i++) {
POSNEG[i] = POS[i];
POSNEG[i + 141] = NEG[i];
}
for(int i = 0; i < 10; i++) {
Alphabet alpha= new Alphabet(DNA);
StringCharFeatures traindat = new StringCharFeatures(alpha);
traindat.set_features(POSNEG);
StringWordFeatures trainudat = new StringWordFeatures(traindat.get_alphabet());
trainudat.obtain_from_char(traindat, order-1, order, gap, reverse);
SortWordString pre = new SortWordString();
pre.init(trainudat);
trainudat.add_preprocessor(pre);
trainudat.apply_preprocessor();
CommWordStringKernel spec = new CommWordStringKernel(10, false);
spec.set_normalizer(new IdentityKernelNormalizer());
spec.init(trainudat, trainudat);
DoubleMatrix K = spec.get_kernel_matrix();
}
modshogun.exit_shogun();
}
}