This page lists ready to run shogun examples for the Python Modular interface.

To run the examples issue

python name_of_example.py

Classifier

../examples/documented/python_modular/classifier_averaged_perceptron_modular.py

# In this example the Averaged Perceptron used to classify toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,1.,1000,1],[traindat,testdat,label_traindat,1.,1000,1]]

def classifier_averaged_perceptron_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,learn_rate=1.,max_iter=1000,num_threads=1):
	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Classifier import AveragedPerceptron

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=BinaryLabels(label_train_twoclass)

	perceptron=AveragedPerceptron(feats_train, labels)
	perceptron.set_learn_rate(learn_rate)
	perceptron.set_max_iter(max_iter)
	# only guaranteed to converge for separable data
	perceptron.train()

	perceptron.set_features(feats_test)
	out_labels = perceptron.apply().get_labels()
	#print(out_labels)
	return perceptron, out_labels

if __name__=='__main__':
	print('AveragedPerceptron')
	classifier_averaged_perceptron_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_conjugateindex_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')

parameter_list = [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]]

def classifier_conjugateindex_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import ConjugateIndex

	feats_train = RealFeatures(fm_train_real)
	feats_test = RealFeatures(fm_test_real)

	labels = MulticlassLabels(label_train_multiclass)

	ci = ConjugateIndex(feats_train, labels)
	ci.train()

	res = ci.apply(feats_test).get_labels()
	return ci, res

if __name__=='__main__':
	print('ConjugateIndex')
	classifier_conjugateindex_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_custom_kernel_modular.py

# This example shows how to use a custom defined kernel function for training a
# two class Support Vector Machine (SVM) classifier on a randomly generated
# examples. The SVM regularization constant is set to C=1. 

#!/usr/bin/env python
parameter_list = [[1,7],[2,8]]

def classifier_custom_kernel_modular (C=1,dim=7):
	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Kernel import CustomKernel
	from shogun.Classifier import LibSVM

	from numpy import diag,ones,sign
	from numpy.random import rand,seed

	seed((C,dim))

	lab=sign(2*rand(dim) - 1)
	data=rand(dim, dim)
	symdata=data*data.T + diag(ones(dim))
    
	kernel=CustomKernel()
	kernel.set_full_kernel_matrix_from_full(data)
	labels=BinaryLabels(lab)
	svm=LibSVM(C, kernel, labels)
	svm.train()
	predictions =svm.apply() 
	out=svm.apply().get_labels()
	return svm,out

if __name__=='__main__':
	print('custom_kernel')
	classifier_custom_kernel_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_domainadaptationsvm_modular.py

# In this example we demonstrate how to use SVMs in a domain adaptation
# scenario. Here, we assume that we have two problem domains, one with
# an abundance of training data (source domain) and one with only a few
# training examples (target domain). These domains are assumed to be
# different but related enough to transfer information between them.
# Thus, we first train an SVM on the source domain and then subsequently
# pass this previously trained SVM object to the DASVM, that we train
# on the target domain. The DASVM internally computes a custom linear term
# (for the underlying quadratic program of the dual formulation of the SVM)
# based on the support vectors of the source SVM and the training examples
# of the target SVM. Finally, it can be used for prediction just as any other
# SVM object.
# 

#!/usr/bin/env python
import numpy

from shogun.Features import StringCharFeatures, BinaryLabels, DNA
from shogun.Kernel import WeightedDegreeStringKernel
from shogun.Classifier import SVMLight, DomainAdaptationSVM, MSG_DEBUG

traindna = ['CGCACGTACGTAGCTCGAT',
		      'CGACGTAGTCGTAGTCGTA',
		      'CGACGGGGGGGGGGTCGTA',
		      'CGACCTAGTCGTAGTCGTA',
		      'CGACCACAGTTATATAGTA',
		      'CGACGTAGTCGTAGTCGTA',
		      'CGACGTAGTTTTTTTCGTA',
		      'CGACGTAGTCGTAGCCCCA',
		      'CAAAAAAAAAAAAAAAATA',
		      'CGACGGGGGGGGGGGCGTA']
label_traindna = numpy.array(5*[-1.0] + 5*[1.0])
testdna = ['AGCACGTACGTAGCTCGAT',
		      'AGACGTAGTCGTAGTCGTA',
		      'CAACGGGGGGGGGGTCGTA',
		      'CGACCTAGTCGTAGTCGTA',
		      'CGAACACAGTTATATAGTA',
		      'CGACCTAGTCGTAGTCGTA',
		      'CGACGTGGGGTTTTTCGTA',
		      'CGACGTAGTCCCAGCCCCA',
		      'CAAAAAAAAAAAACCAATA',
		      'CGACGGCCGGGGGGGCGTA']
label_testdna = numpy.array(5*[-1.0] + 5*[1.0])


traindna2 = ['AGACAGTCAGTCGATAGCT',
		      'AGCAGTCGTAGTCGTAGTC',
		      'AGCAGGGGGGGGGGTAGTC',
		      'AGCAATCGTAGTCGTAGTC',
		      'AGCAACACGTTCTCTCGTC',
		      'AGCAGTCGTAGTCGTAGTC',
		      'AGCAGTCGTTTTTTTAGTC',
		      'AGCAGTCGTAGTCGAAAAC',
		      'ACCCCCCCCCCCCCCCCTC',
		      'AGCAGGGGGGGGGGGAGTC']
label_traindna2 = numpy.array(5*[-1.0] + 5*[1.0])
testdna2 = ['CGACAGTCAGTCGATAGCT',
		      'CGCAGTCGTAGTCGTAGTC',
		      'ACCAGGGGGGGGGGTAGTC',
		      'AGCAATCGTAGTCGTAGTC',
		      'AGCCACACGTTCTCTCGTC',
		      'AGCAATCGTAGTCGTAGTC',
		      'AGCAGTGGGGTTTTTAGTC',
		      'AGCAGTCGTAAACGAAAAC',
		      'ACCCCCCCCCCCCAACCTC',
		      'AGCAGGAAGGGGGGGAGTC']
label_testdna2 = numpy.array(5*[-1.0] + 5*[1.0])

parameter_list = [[traindna,testdna,label_traindna,label_testdna,traindna2,label_traindna2, \
                       testdna2,label_testdna2,1,3],[traindna,testdna,label_traindna,label_testdna,traindna2,label_traindna2, \
                       testdna2,label_testdna2,2,5]] 

def classifier_domainadaptationsvm_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna, \
                                               label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \
                                               label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3):



    
	feats_train = StringCharFeatures(fm_train_dna, DNA)
	feats_test = StringCharFeatures(fm_test_dna, DNA)
	kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels = BinaryLabels(label_train_dna)
	svm = SVMLight(C, kernel, labels)
	svm.train()
	#svm.io.set_loglevel(MSG_DEBUG)
    
	#####################################
		
	#print("obtaining DA SVM from previously trained SVM")

	feats_train2 = StringCharFeatures(fm_train_dna, DNA)
	feats_test2 = StringCharFeatures(fm_test_dna, DNA)
	kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree)
	labels2 = BinaryLabels(label_train_dna)

	# we regularize against the previously obtained solution
	dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0)
	dasvm.train()

	out = dasvm.apply_binary(feats_test2)

	return out #,dasvm TODO

if __name__=='__main__':
	print('SVMLight')
	classifier_domainadaptationsvm_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_featureblock_logistic_regression.py

#!/usr/bin/env python
from numpy import array,hstack
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat]]

def classifier_featureblock_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):

	from modshogun import BinaryLabels, RealFeatures, IndexBlock, IndexBlockGroup, FeatureBlockLogisticRegression

	features = RealFeatures(hstack((traindat,traindat)))
	labels = BinaryLabels(hstack((label_train,label_train)))

	n_features = features.get_num_features()
	block_one = IndexBlock(0,n_features/2)
	block_two = IndexBlock(n_features/2,n_features)
	block_group = IndexBlockGroup()
	block_group.add_block(block_one)
	block_group.add_block(block_two)

	mtlr = FeatureBlockLogisticRegression(0.1,features,labels,block_group)
	mtlr.set_regularization(1) # use regularization ratio
	mtlr.set_tolerance(1e-2) # use 1e-2 tolerance
	mtlr.train()
	out = mtlr.apply().get_labels()

	return out

if __name__=='__main__':
	print('FeatureBlockLogisticRegression')
	classifier_featureblock_logistic_regression(*parameter_list[0])

../examples/documented/python_modular/classifier_gaussiannaivebayes_modular.py

# In this example the Gaussian Naive Bayes algorithm used to classify
# toy data

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')

parameter_list = [[traindat,testdat,label_traindat]]

def classifier_gaussiannaivebayes_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import GaussianNaiveBayes

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	labels=MulticlassLabels(label_train_multiclass)

	gnb=GaussianNaiveBayes(feats_train, labels)
	gnb_train = gnb.train()
	output=gnb.apply(feats_test).get_labels()
	return gnb, gnb_train, output

if __name__=='__main__':
	print('GaussianNaiveBayes')
	classifier_gaussiannaivebayes_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_gmnpsvm_modular.py

# In this example a multi-class support vector machine is trained on a toy data
# set and the trained classifier is then used to predict labels of test
# examples. The training algorithm is based on BSVM formulation (L2-soft margin
# and the bias added to the objective function) which is solved by the Improved
# Mitchell-Demyanov-Malozemov algorithm. The training algorithm uses the Gaussian
# kernel of width 2.1 and the regularization constant C=1. The solver stops if the
# relative duality gap falls below 1e-5. 
# 
# For more details on the used SVM solver see 
#  V.Franc: Optimization Algorithms for Kernel Methods. Research report.
#  CTU-CMP-2005-22. CTU FEL Prague. 2005.
#  ftp://cmp.felk.cvut.cz/pub/cmp/articles/franc/Franc-PhD.pdf .
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

def classifier_gmnpsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):

	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import GMNPSVM

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=MulticlassLabels(label_train_multiclass)

	svm=GMNPSVM(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.train(feats_train)
	kernel.init(feats_train, feats_test)
	out=svm.apply(feats_test).get_labels()
	return out,kernel
if __name__=='__main__':
	print('GMNPSVM')
	classifier_gmnpsvm_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_gpbtsvm_modular.py

# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is then used to predict labels of test
# examples. As training algorithm Gradient Projection Decomposition Technique
# (GPDT) is used with SVM regularization parameter C=1 and a Gaussian
# kernel of width 2.1. The solver returns an epsilon-precise (epsilon=1e-5) solution. 
# 
# For more details on GPDT solver see http://dm.unife.it/gpdt . 
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

def classifier_gpbtsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,width=2.1,C=1,epsilon=1e-5):


	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import GPBTSVM

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	kernel=GaussianKernel(feats_train, feats_train, width)
	labels=BinaryLabels(label_train_twoclass)

	svm=GPBTSVM(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.apply().get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()


if __name__=='__main__':
	print('GPBTSVM')
	classifier_gpbtsvm_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_knn_modular.py

# This example shows usage of a k-nearest neighbor (KNN) classification rule on
# a toy data set. The number of the nearest neighbors is set to k=3 and the distances
# are measured by the Euclidean metric. Finally, the KNN rule is applied to predict
# labels of test examples. 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')

parameter_list = [[traindat,testdat,label_traindat,3],[traindat,testdat,label_traindat,3]]

def classifier_knn_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat, k=3 ):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import KNN
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	distance=EuclideanDistance(feats_train, feats_train)


	labels=MulticlassLabels(label_train_multiclass)

	knn=KNN(k, distance, labels)
	knn_train = knn.train()
	output=knn.apply(feats_test).get_labels()
	multiple_k=knn.classify_for_multiple_k()
	return knn,knn_train,output,multiple_k

if __name__=='__main__':
	print('KNN')
	classifier_knn_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_larank_modular.py

# In this example a multi-class support vector machine classifier is trained on a
# toy data set and the trained classifier is then used to predict labels of test
# examples. As training algorithm the LaRank algorithm is used with SVM
# regularization parameter C=1 and a Gaussian kernel of width 2.1 and a precision
# set to epsilon=1e-5.
# 
# For more details on LaRank see
#    Bordes, A. and Bottou, L. and Gallinari, P. and Weston, J.
#    Solving MultiClass Support Vector Machines with LaRank. ICML 2007.
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')

parameter_list = [[traindat,testdat,label_traindat,0.9,1,2000],[traindat,testdat,label_traindat,3,1,5000]]

def classifier_larank_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,C=0.9,num_threads=1,num_iter=5):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import LaRank
	from shogun.Mathematics import Math_init_random
	Math_init_random(17)

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	width=2.1
	kernel=GaussianKernel(feats_train, feats_train, width)

	epsilon=1e-5
	labels=MulticlassLabels(label_train_multiclass)

	svm=LaRank(C, kernel, labels)
	#svm.set_tau(1e-3)
	svm.set_batch_mode(False)
	#svm.io.enable_progress()
	svm.set_epsilon(epsilon)
	svm.train()
	out=svm.apply(feats_train).get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()


if __name__=='__main__':
	print('LaRank')
	[predictions, svm, labels] = classifier_larank_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_lda_modular.py

# In this example a two-class linear classifier based on the Linear Discriminant
# Analysis (LDA) is trained on a toy data set and then the trained classifier is
# used to predict test examples. The regularization parameter, which corresponds
# to a weight of a unitary matrix added to the covariance matrix, is set to
# gamma=3.
# 
# For more details on the LDA see e.g.
#     http://en.wikipedia.org/wiki/Linear_discriminant_analysis

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,3,1],[traindat,testdat,label_traindat,4,1]]

def classifier_lda_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,gamma=3,num_threads=1):
	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Classifier import LDA

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=BinaryLabels(label_train_twoclass)

	lda=LDA(gamma, feats_train, labels)
	lda.train()

	lda.get_bias()
	lda.get_w()
	lda.set_features(feats_test)
	lda.apply().get_labels()
	return lda,lda.apply().get_labels()

if __name__=='__main__':
	print('LDA')
	classifier_lda_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_liblinear_modular.py

# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is then used to predict labels of
# test examples. As training algorithm the LIBLINEAR solver is used with the SVM
# regularization parameter C=0.9 and the bias in the classification rule switched
# on and the precision parameters epsilon=1e-5.
# 
# For more details on LIBLINEAR see
#     http://www.csie.ntu.edu.tw/~cjlin/liblinear/

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,0.9,1e-3],[traindat,testdat,label_traindat,0.8,1e-2]]

def classifier_liblinear_modular (fm_train_real, fm_test_real,
		label_train_twoclass, C, epsilon):

	from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
	from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL
	from shogun.Mathematics import Math_init_random
	Math_init_random(17)

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	labels=BinaryLabels(label_train_twoclass)

	svm=LibLinear(C, feats_train, labels)
	svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
	svm.set_epsilon(epsilon)
	svm.set_bias_enabled(True)
	svm.train()

	svm.set_features(feats_test)
	svm.apply().get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()



if __name__=='__main__':
	print('LibLinear')
	classifier_liblinear_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_libsvm_minimal_modular.py

# In this example a two-class support vector machine classifier is trained on a
# 2-dimensional randomly generated data set and the trained classifier is used to
# predict labels of test examples. As training algorithm the LIBSVM solver is used
# with SVM regularization parameter C=1 and a Gaussian kernel of width 2.1.
# 
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/

#!/usr/bin/env python
from numpy import mean, sign

from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,2.1,1]]

def classifier_libsvm_minimal_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,width=2.1,C=1):
	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Classifier import LibSVM
	from shogun.Kernel import GaussianKernel

	feats_train=RealFeatures(fm_train_real);
	feats_test=RealFeatures(fm_test_real);
	kernel=GaussianKernel(feats_train, feats_train, width);

	labels=BinaryLabels(label_train_twoclass);
	svm=LibSVM(C, kernel, labels);
	svm.train();

	kernel.init(feats_train, feats_test);
	out=svm.apply().get_labels();
	testerr=mean(sign(out)!=label_train_twoclass)
	print(testerr)

if __name__=='__main__':
	print('LibSVM Minimal')
	classifier_libsvm_minimal_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_libsvm_modular.py

# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the LIBSVM solver is used with SVM
# regularization parameter C=1 and a Gaussian kernel of width 2.1 and the
# precision parameter epsilon=1e-5. The example also shows how to retrieve the
# support vectors from the train SVM model.
# 
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

def classifier_libsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import LibSVM

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	kernel=GaussianKernel(feats_train, feats_train, width)
	labels=BinaryLabels(label_train_twoclass)

	svm=LibSVM(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.train()

	kernel.init(feats_train, feats_test)
	labels = svm.apply().get_labels()
	supportvectors = sv_idx=svm.get_support_vectors()
	alphas=svm.get_alphas()
	predictions = svm.apply()
	print predictions.get_labels()
	return predictions, svm, predictions.get_labels()

if __name__=='__main__':
	print('LibSVM')
	classifier_libsvm_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_libsvmoneclass_modular.py

# In this example a one-class support vector machine classifier is trained on a
# toy data set. The training algorithm finds a hyperplane in the RKHS which
# separates the training data from the origin. The one-class classifier is
# typically used to estimate the support of a high-dimesnional distribution. 
# For more details see e.g. 
#   B. Schoelkopf et al. Estimating the support of a high-dimensional
#   distribution. Neural Computation, 13, 2001, 1443-1471. 
# 
# In the example, the one-class SVM is trained by the LIBSVM solver with the
# regularization parameter C=1 and the Gaussian kernel of width 2.1 and the
# precision parameter epsilon=1e-5.
# 
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,2.2,1,1e-7],[traindat,testdat,2.1,1,1e-5]]

def classifier_libsvmoneclass_modular (fm_train_real=traindat,fm_test_real=testdat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import LibSVMOneClass

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	kernel=GaussianKernel(feats_train, feats_train, width)

	svm=LibSVMOneClass(C, kernel)
	svm.set_epsilon(epsilon)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.apply().get_labels()

	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()

if __name__=='__main__':
	print('LibSVMOneClass')
	classifier_libsvmoneclass_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_mpdsvm_modular.py

# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the Minimal Primal Dual SVM is used with SVM
# regularization parameter C=1 and a Gaussian kernel of width 1.2 and the
# precision parameter 1e-5. 
# 
# For more details on the MPD solver see 
#  Kienzle, W. and B. Sch�lkopf: Training Support Vector Machines with Multiple
#  Equality Constraints. Machine Learning: ECML 2005, 182-193. (Eds.) Carbonell,
#  J. G., J. Siekmann, Springer, Berlin, Germany (11 2005)

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,1,1e-5],[traindat,testdat,label_traindat,0.9,1e-5]]

def classifier_mpdsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=1,epsilon=1e-5):

	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import MPDSVM

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	width=2.1
	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=BinaryLabels(label_train_twoclass)

	svm=MPDSVM(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.apply().get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()

if __name__=='__main__':
	print('MPDSVM')
	classifier_mpdsvm_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclass_ecoc.py

#!/usr/bin/env python
import re
import time
from tools.multiclass_shared import prepare_data

# run with toy data
[traindat, label_traindat, testdat, label_testdat] = prepare_data()
# run with opt-digits if available
#[traindat, label_traindat, testdat, label_testdat] = prepare_data(False)

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5]]

def classifier_multiclass_ecoc (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):

	import shogun.Classifier as Classifier
	from shogun.Classifier import ECOCStrategy, LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
	from shogun.Evaluation import MulticlassAccuracy
	from shogun.Features import RealFeatures, MulticlassLabels

	def nonabstract_class(name):
		try:
		    getattr(Classifier, name)()
		except TypeError:
		    return False
		return True

	encoders = [x for x in dir(Classifier)
		    if re.match(r'ECOC.+Encoder', x) and nonabstract_class(x)]
	decoders = [x for x in dir(Classifier)
		    if re.match(r'ECOC.+Decoder', x) and nonabstract_class(x)]

	fea_train = RealFeatures(fm_train_real)
	fea_test  = RealFeatures(fm_test_real)
	gnd_train = MulticlassLabels(label_train_multiclass)
	if label_test_multiclass is None:
		gnd_test = None
	else:
		gnd_test = MulticlassLabels(label_test_multiclass)

	base_classifier = LibLinear(L2R_L2LOSS_SVC)
	base_classifier.set_bias_enabled(True)

	print('Testing with %d encoders and %d decoders' % (len(encoders), len(decoders)))
	print('-' * 70)
	format_str = '%%15s + %%-10s  %%-10%s %%-10%s %%-10%s'
	print((format_str % ('s', 's', 's')) % ('encoder', 'decoder', 'codelen', 'time', 'accuracy'))

	def run_ecoc(ier, idr):
		encoder = getattr(Classifier, encoders[ier])()
		decoder = getattr(Classifier, decoders[idr])()

		# whether encoder is data dependent
		if hasattr(encoder, 'set_labels'):
		    encoder.set_labels(gnd_train)
		    encoder.set_features(fea_train)

		strategy = ECOCStrategy(encoder, decoder)
		classifier = LinearMulticlassMachine(strategy, fea_train, base_classifier, gnd_train)
		classifier.train()
		label_pred = classifier.apply(fea_test)
		if gnd_test is not None:
		    evaluator = MulticlassAccuracy()
		    acc = evaluator.evaluate(label_pred, gnd_test)
		else:
		    acc = None

		return (classifier.get_num_machines(), acc)


	for ier in range(len(encoders)):
		for idr in range(len(decoders)):
		    t_begin = time.clock()
		    (codelen, acc) = run_ecoc(ier, idr)
		    if acc is None:
		        acc_fmt = 's'
		        acc = 'N/A'
		    else:
		        acc_fmt = '.4f'

		    t_elapse = time.clock() - t_begin
		    print((format_str % ('d', '.3f', acc_fmt)) % 
		            (encoders[ier][4:-7], decoders[idr][4:-7], codelen, t_elapse, acc))

if __name__=='__main__':
    print('MulticlassECOC')
    classifier_multiclass_ecoc(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclass_ecoc_discriminant.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data

[traindat, label_traindat, testdat, label_testdat] = prepare_data(False)

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclass_ecoc_discriminant (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCDiscriminantEncoder, ECOCHDDecoder

    feats_train = RealFeatures(fm_train_real)
    feats_test  = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    encoder = ECOCDiscriminantEncoder()
    encoder.set_features(feats_train)
    encoder.set_labels(labels)
    encoder.set_sffs_iterations(50)

    strategy = ECOCStrategy(encoder, ECOCHDDecoder())

    classifier = LinearMulticlassMachine(strategy, feats_train, classifier, labels)
    classifier.train()
    label_pred = classifier.apply(feats_test)
    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out

if __name__=='__main__':
    print('MulticlassMachine')
    classifier_multiclass_ecoc_discriminant(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclass_ecoc_ovr.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data
# run with toy data
[traindat, label_traindat, testdat, label_testdat] = prepare_data()

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclass_ecoc_ovr (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
	from shogun.Classifier import ECOCStrategy, ECOCOVREncoder, ECOCLLBDecoder, MulticlassOneVsRestStrategy

	feats_train = RealFeatures(fm_train_real)
	feats_test  = RealFeatures(fm_test_real)

	labels = MulticlassLabels(label_train_multiclass)

	classifier = LibLinear(L2R_L2LOSS_SVC)
	classifier.set_epsilon(epsilon)
	classifier.set_bias_enabled(True)

	mc_classifier = LinearMulticlassMachine(MulticlassOneVsRestStrategy(), feats_train, classifier, labels)
	mc_classifier.train()
	label_mc = mc_classifier.apply(feats_test)
	out_mc = label_mc.get_labels()

	ecoc_strategy = ECOCStrategy(ECOCOVREncoder(), ECOCLLBDecoder())
	ecoc_classifier = LinearMulticlassMachine(ecoc_strategy, feats_train, classifier, labels)
	ecoc_classifier.train()
	label_ecoc = ecoc_classifier.apply(feats_test)
	out_ecoc = label_ecoc.get_labels() 

	n_diff = (out_mc != out_ecoc).sum()
	if n_diff == 0:
		print("Same results for OvR and ECOCOvR")
	else:
		print("Different results for OvR and ECOCOvR (%d out of %d are different)" % (n_diff, len(out_mc)))

	if label_test_multiclass is not None:
		from shogun.Evaluation import MulticlassAccuracy
		labels_test = MulticlassLabels(label_test_multiclass)
		evaluator = MulticlassAccuracy()
		acc_mc = evaluator.evaluate(label_mc, labels_test)
		acc_ecoc = evaluator.evaluate(label_ecoc, labels_test)
		print('Normal OVR Accuracy = %.4f' % acc_mc)
		print('ECOC OVR Accuracy   = %.4f' % acc_ecoc)

	return out_ecoc, out_mc

if __name__=='__main__':
	print('MulticlassMachine')
	classifier_multiclass_ecoc_ovr(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclass_ecoc_random.py

#!/usr/bin/env python
from os.path import exists
from tools.load import LoadMatrix
lm=LoadMatrix()

if exists('../data/../mldata/uci-20070111-optdigits.mat'):
    from scipy.io import loadmat

    mat = loadmat('../data/../mldata/uci-20070111-optdigits.mat')['int0'].astype(float)
    X = mat[:-1,:]
    Y = mat[-1,:]
    isplit = X.shape[1]/2
    traindat = X[:,:isplit]
    label_traindat = Y[:isplit]
    testdat = X[:, isplit:]
    label_testdat = Y[isplit:]
else:
    traindat = lm.load_numbers('../data/fm_train_real.dat')
    testdat  = lm.load_numbers('../data/fm_test_real.dat')
    label_traindat = lm.load_labels('../data/label_train_multiclass.dat')
    label_testdat = None

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclass_ecoc_random (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
    from shogun.Features import RealFeatures, MulticlassLabels
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine
    from shogun.Classifier import ECOCStrategy, ECOCRandomSparseEncoder, ECOCRandomDenseEncoder, ECOCHDDecoder

    feats_train = RealFeatures(fm_train_real)
    feats_test  = RealFeatures(fm_test_real)

    labels = MulticlassLabels(label_train_multiclass)

    classifier = LibLinear(L2R_L2LOSS_SVC)
    classifier.set_epsilon(epsilon)
    classifier.set_bias_enabled(True)

    rnd_dense_strategy = ECOCStrategy(ECOCRandomDenseEncoder(), ECOCHDDecoder())
    rnd_sparse_strategy = ECOCStrategy(ECOCRandomSparseEncoder(), ECOCHDDecoder())

    dense_classifier = LinearMulticlassMachine(rnd_dense_strategy, feats_train, classifier, labels)
    dense_classifier.train()
    label_dense = dense_classifier.apply(feats_test)
    out_dense = label_dense.get_labels()

    sparse_classifier = LinearMulticlassMachine(rnd_sparse_strategy, feats_train, classifier, labels)
    sparse_classifier.train()
    label_sparse = sparse_classifier.apply(feats_test)
    out_sparse = label_sparse.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc_dense = evaluator.evaluate(label_dense, labels_test)
        acc_sparse = evaluator.evaluate(label_sparse, labels_test)
        print('Random Dense Accuracy  = %.4f' % acc_dense)
        print('Random Sparse Accuracy = %.4f' % acc_sparse)

    return out_sparse, out_dense

if __name__=='__main__':
    print('MulticlassMachine')
    classifier_multiclass_ecoc_random(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclass_relaxedtree.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data

[traindat, label_traindat, testdat, label_testdat] = prepare_data(True)

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclass_relaxedtree (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import RelaxedTree, MulticlassLibLinear
	from shogun.Kernel import GaussianKernel

	print('Working on a problem of %d features and %d samples' % fm_train_real.shape)

	feats_train = RealFeatures(fm_train_real)

	labels = MulticlassLabels(label_train_multiclass)

	machine = RelaxedTree()
	machine.set_machine_for_confusion_matrix(MulticlassLibLinear())
	machine.set_kernel(GaussianKernel())
	machine.set_labels(labels)
	machine.train(feats_train)

	label_pred = machine.apply_multiclass(RealFeatures(fm_test_real))
	out = label_pred.get_labels()

	if label_test_multiclass is not None:
		from shogun.Evaluation import MulticlassAccuracy
		labels_test = MulticlassLabels(label_test_multiclass)
		evaluator = MulticlassAccuracy()
		acc = evaluator.evaluate(label_pred, labels_test)
		print('Accuracy = %.4f' % acc)

	return out

if __name__=='__main__':
	print('MulticlassMachine')
	classifier_multiclass_relaxedtree(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclass_shareboost.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data

[traindat, label_traindat, testdat, label_testdat] = prepare_data(False)

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclass_shareboost (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,lawidth=2.1,C=1,epsilon=1e-5):
    from shogun.Features import RealFeatures, RealSubsetFeatures, MulticlassLabels
    from shogun.Classifier import ShareBoost

    print('Working on a problem of %d features and %d samples' % fm_train_real.shape)

    feats_train = RealFeatures(fm_train_real)

    labels = MulticlassLabels(label_train_multiclass)

    shareboost = ShareBoost(feats_train, labels, min(fm_train_real.shape[0]-1, 30))
    shareboost.train();
    print(shareboost.get_activeset())

    feats_test  = RealSubsetFeatures(RealFeatures(fm_test_real), shareboost.get_activeset())
    label_pred = shareboost.apply(feats_test)

    out = label_pred.get_labels()

    if label_test_multiclass is not None:
        from shogun.Evaluation import MulticlassAccuracy
        labels_test = MulticlassLabels(label_test_multiclass)
        evaluator = MulticlassAccuracy()
        acc = evaluator.evaluate(label_pred, labels_test)
        print('Accuracy = %.4f' % acc)

    return out

if __name__=='__main__':
    print('MulticlassMachine')
    classifier_multiclass_shareboost(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclassliblinear_modular.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data

[traindat, label_traindat, testdat, label_testdat] = prepare_data(False)

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclassliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import MulticlassLibLinear

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = MulticlassLibLinear(C,feats_train,labels)
	classifier.train()

	label_pred = classifier.apply(feats_test)
	out = label_pred.get_labels()

	if label_test_multiclass is not None:
		from shogun.Evaluation import MulticlassAccuracy
		labels_test = MulticlassLabels(label_test_multiclass)
		evaluator = MulticlassAccuracy()
		acc = evaluator.evaluate(label_pred, labels_test)
		print('Accuracy = %.4f' % acc)
	
	return out

if __name__=='__main__':
	print('MulticlassLibLinear')
	classifier_multiclassliblinear_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclasslibsvm_modular.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data

[traindat, label_traindat, testdat, label_testdat] = prepare_data()

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

def classifier_multiclasslibsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import MulticlassLibSVM

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=MulticlassLabels(label_train_multiclass)

	svm=MulticlassLibSVM(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.train()

	kernel.init(feats_train, feats_test)
	out = svm.apply().get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()

if __name__=='__main__':
	print('MulticlassLibSVM')
	[predictions, svm, labels] = classifier_multiclasslibsvm_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclasslinearmachine_modular.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data

[traindat, label_traindat, testdat, label_testdat] = prepare_data(False)

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclasslinearmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC, LinearMulticlassMachine, MulticlassOneVsOneStrategy, MulticlassOneVsRestStrategy

	feats_train = RealFeatures(fm_train_real)
	feats_test  = RealFeatures(fm_test_real)

	labels = MulticlassLabels(label_train_multiclass)
	
	classifier = LibLinear(L2R_L2LOSS_SVC)
	classifier.set_epsilon(epsilon)
	classifier.set_bias_enabled(True)
	mc_classifier = LinearMulticlassMachine(MulticlassOneVsOneStrategy(), feats_train, classifier, labels)

	mc_classifier.train()
	label_pred = mc_classifier.apply()
	out = label_pred.get_labels()

	if label_test_multiclass is not None:
		from shogun.Evaluation import MulticlassAccuracy
		labels_test = MulticlassLabels(label_test_multiclass)
		evaluator = MulticlassAccuracy()
		acc = evaluator.evaluate(label_pred, labels_test)
		print('Accuracy = %.4f' % acc)

	return out

if __name__=='__main__':
	print('MulticlassMachine')
	classifier_multiclasslinearmachine_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclassmachine_modular.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data

[traindat, label_traindat, testdat, label_testdat] = prepare_data()

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

def classifier_multiclassmachine_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Kernel import GaussianKernel
	from shogun.Classifier import LibSVM, KernelMulticlassMachine, MulticlassOneVsRestStrategy

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = LibSVM()
	classifier.set_epsilon(epsilon)
	#print labels.get_labels()
	mc_classifier = KernelMulticlassMachine(MulticlassOneVsRestStrategy(),kernel,classifier,labels)
	mc_classifier.train()

	kernel.init(feats_train, feats_test)
	out = mc_classifier.apply().get_labels()
	return out

if __name__=='__main__':
	print('MulticlassMachine')
	classifier_multiclassmachine_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclassmultipleoutputliblinear_modular.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data

[traindat, label_traindat, testdat, label_testdat] = prepare_data(False)

parameter_list = [[traindat,testdat,label_traindat,label_testdat,2.1,1,1e-5],[traindat,testdat,label_traindat,label_testdat,2.2,1,1e-5]]

def classifier_multiclassmultipleoutputliblinear_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,label_test_multiclass=label_testdat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels, MulticlassMultipleOutputLabels
	from shogun.Classifier import MulticlassLibLinear

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = MulticlassLibLinear(C,feats_train,labels)
	classifier.train()

	label_pred = classifier.apply_multiclass_multiple_output(feats_test,2)
	out = label_pred.get_labels()
	#print out
	return out

if __name__=='__main__':
	print('MulticlassMultipleOutputLibLinear')
	classifier_multiclassmultipleoutputliblinear_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_multiclassocas_modular.py

#!/usr/bin/env python
from tools.multiclass_shared import prepare_data

[traindat, label_traindat, testdat, label_testdat] = prepare_data()

parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]]

def classifier_multiclassocas_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import MulticlassOCAS

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=MulticlassLabels(label_train_multiclass)

	classifier = MulticlassOCAS(C,feats_train,labels)
	classifier.train()

	out = classifier.apply(feats_test).get_labels()
	return out

if __name__=='__main__':
	print('MulticlassOCAS')
	classifier_multiclassocas_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_perceptron_modular.py

# This example shows usage of the Perceptron algorithm for training a two-class
# linear classifier, i.e.  y = sign( <x,w>+b). The Perceptron algorithm works by
# iteratively passing though the training examples and applying the update rule on
# those examples which are misclassified by the current classifier. The Perceptron
# update rule reads
# 
#   w(t+1) = w(t) + alpha * y_t * x_t
#   b(t+1) = b(t) + alpha * y_t
# 
# where (x_t,y_t) is feature vector and label (must be +1/-1) of the misclassified example
#       (w(t),b(t)) are the current parameters of the linear classifier
#       (w(t+1),b(t+1)) are the new parameters of the linear classifier
#       alpha is the learning rate; in this examples alpha=1
# 
# The Perceptron algorithm iterates until all training examples are correctly
# classified or the prescribed maximal number of iterations, in this example
# max_iter=1000, is reached.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,1.,1000,1],[traindat,testdat,label_traindat,1.,1000,1]]

def classifier_perceptron_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,learn_rate=1.,max_iter=1000,num_threads=1):
	from shogun.Features import RealFeatures, BinaryLabels
	from shogun.Classifier import Perceptron

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	labels=BinaryLabels(label_train_twoclass)

	perceptron=Perceptron(feats_train, labels)
	perceptron.set_learn_rate(learn_rate)
	perceptron.set_max_iter(max_iter)
	# only guaranteed to converge for separable data
	perceptron.train()

	perceptron.set_features(feats_test)
	out_labels = perceptron.apply().get_labels()
	return perceptron, out_labels

if __name__=='__main__':
	print('Perceptron')
	classifier_perceptron_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_qda_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix
lm = LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_multiclass.dat')

parameter_list = [[traindat, testdat, label_traindat, 1e-4, False], \
		  [traindat, testdat, label_traindat, 1e-4, True]]

def classifier_qda_modular (fm_train_real=traindat, fm_test_real=testdat, label_train_twoclass=label_traindat, tolerance=1e-4, store_covs=False):
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import QDA

	feats_train = RealFeatures(fm_train_real)
	feats_test  = RealFeatures(fm_test_real)

	labels = MulticlassLabels(label_train_twoclass)

	qda = QDA(feats_train, labels, tolerance, store_covs)
	qda.train()

	qda.apply(feats_test).get_labels()
	qda.set_features(feats_test)
	return qda, qda.apply().get_labels()

if __name__=='__main__':
	print('QDA')
	classifier_qda_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_subgradientsvm_modular.py

# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the steepest descent subgradient algorithm is
# used. The SVM regularization parameter is set to C=0.9 and the bias in the
# classification rule is switched off. The solver iterates until it finds an
# epsilon-precise solution (epsilon=1e-3) or the maximal training time
# max_train_time=1 (seconds) is exceeded. The unbiased linear rule is trained.
# 
# Note that this solver often does not converges because the steepest descent
# subgradient algorithm is oversensitive to rounding errors. Note also that this
# is an unpublished work which was predecessor of the OCAS solver (see
# classifier_svmocas).

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
train=lm.load_numbers('../data/fm_train_real.dat')
test=lm.load_numbers('../data/fm_test_real.dat')
labels=lm.load_labels('../data/label_train_twoclass.dat')

parameter_list=[[train,test,labels,5,1e-3,3.0], [train,test,labels,0.9,1e-2,1.0]]

def classifier_subgradientsvm_modular (fm_train_real, fm_test_real,
		label_train_twoclass, C, epsilon, max_train_time):

	from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
	from shogun.Classifier import SubGradientSVM

	realfeat=RealFeatures(fm_train_real)
	feats_train=SparseRealFeatures()
	feats_train.obtain_from_simple(realfeat)
	realfeat=RealFeatures(fm_test_real)
	feats_test=SparseRealFeatures()
	feats_test.obtain_from_simple(realfeat)

	labels=BinaryLabels(label_train_twoclass)

	svm=SubGradientSVM(C, feats_train, labels)
	svm.set_epsilon(epsilon)
	svm.set_max_train_time(max_train_time)
	svm.train()

	svm.set_features(feats_test)
	labels=svm.apply().get_labels()

	return labels, svm

if __name__=='__main__':
	print('SubGradientSVM')
	classifier_subgradientsvm_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_svmlight_batch_linadd_modular.py

# In this example a two-class support vector machine classifier is trained on a
# DNA splice-site detection data set and the trained classifier is used to predict
# labels on test set. As training algorithm SVM^light is used with SVM
# regularization parameter C=1 and the Weighted Degree kernel of the degree 20 and
# a precision parameter epsilon=1e-5. The LINADD trick is used to speed up
# training.
# 
# For more details on the SVM^light see
#  T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
#  Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
# 
# For more details on the Weighted Degree kernel and the LINADD trick see
#   Sonnenburg, s. and R�tsch, G. and Rieck, K. Large Scale Learning with String
#   Kernels. In Bottou, Leon and Chapelle, Olivier and DeCoste, Dennis and Weston,
#   Jason, editor, In Large Scale Kernel Machines, pages 73-103, MIT Press,
#   Cambridge, MA. 2007.  
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

train_dna=lm.load_dna('../data/fm_train_dna.dat')
test_dna=lm.load_dna('../data/fm_test_dna.dat')
label=lm.load_labels('../data/label_train_dna.dat')

parameter_list=[[train_dna, test_dna, label, 20, 0.9, 1e-3, 1],
		[train_dna, test_dna, label, 20, 2.3, 1e-5, 4]]

def classifier_svmlight_batch_linadd_modular (fm_train_dna, fm_test_dna,
		label_train_dna, degree, C, epsilon, num_threads):

	from shogun.Features import StringCharFeatures, BinaryLabels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print('No support for SVMLight available.')
		return

	feats_train=StringCharFeatures(DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	labels=BinaryLabels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)

	#print('SVMLight Objective: %f num_sv: %d' % \)
	#	(svm.get_objective(), svm.get_num_support_vectors())
	svm.set_batch_computation_enabled(False)
	svm.set_linadd_enabled(False)
	svm.apply().get_labels()

	svm.set_batch_computation_enabled(True)
	labels = svm.apply().get_labels()
	return labels, svm


if __name__=='__main__':
	print('SVMlight batch')
	classifier_svmlight_batch_linadd_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_svmlight_linear_term_modular.py

# This example demonstrates how to train an SVMLight classifier
# using a custom linear term. This is used in the class DASVM that
# pre-computes this linear term using a previously trained SVM.
# 

#!/usr/bin/env python
import numpy

traindna=['CGCACGTACGTAGCTCGAT',
		      'CGACGTAGTCGTAGTCGTA',
		      'CGACGGGGGGGGGGTCGTA',
		      'CGACCTAGTCGTAGTCGTA',
		      'CGACCACAGTTATATAGTA',
		      'CGACGTAGTCGTAGTCGTA',
		      'CGACGTAGTTTTTTTCGTA',
		      'CGACGTAGTCGTAGCCCCA',
		      'CAAAAAAAAAAAAAAAATA',
		      'CGACGGGGGGGGGGGCGTA']
label_traindna=numpy.array(5*[-1.0] + 5*[1.0])
testdna=['AGCACGTACGTAGCTCGAT',
		      'AGACGTAGTCGTAGTCGTA',
		      'CAACGGGGGGGGGGTCGTA',
		      'CGACCTAGTCGTAGTCGTA',
		      'CGAACACAGTTATATAGTA',
		      'CGACCTAGTCGTAGTCGTA',
		      'CGACGTGGGGTTTTTCGTA',
		      'CGACGTAGTCCCAGCCCCA',
		      'CAAAAAAAAAAAACCAATA',
		      'CGACGGCCGGGGGGGCGTA']
label_test_dna=numpy.array(5*[-1.0] + 5*[1.0])

parameter_list = [[traindna,testdna,label_traindna,3,10,1e-5,1],[traindna,testdna,label_traindna,3,10,1e-5,1]] 

def classifier_svmlight_linear_term_modular (fm_train_dna=traindna,fm_test_dna=testdna, \
                                                label_train_dna=label_traindna,degree=3, \
                                                C=10,epsilon=1e-5,num_threads=1):
    
    from shogun.Features import StringCharFeatures, BinaryLabels, DNA
    from shogun.Kernel import WeightedDegreeStringKernel
    from shogun.Classifier import SVMLight
    
    feats_train=StringCharFeatures(DNA)
    feats_train.set_features(fm_train_dna)
    feats_test=StringCharFeatures(DNA)
    feats_test.set_features(fm_test_dna)
    
    kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)
    
    labels=BinaryLabels(label_train_dna)
    
    svm=SVMLight(C, kernel, labels)
    svm.set_qpsize(3)
    svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double));
    svm.set_epsilon(epsilon)
    svm.parallel.set_num_threads(num_threads)
    svm.train()
    
    kernel.init(feats_train, feats_test)
    out = svm.apply().get_labels()
    return out,kernel

if __name__=='__main__':
    print('SVMLight')
    classifier_svmlight_linear_term_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_svmlight_modular.py

# In this example a two-class support vector machine classifier is trained on a
# DNA splice-site detection data set and the trained classifier is used to predict
# labels on test set. As training algorithm SVM^light is used with SVM
# regularization parameter C=1.2 and the Weighted Degree kernel of degree 20 and
# the precision parameter epsilon=1e-5.
# 
# For more details on the SVM^light see
#  T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
#  Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
# 
# For more details on the Weighted Degree kernel see
#  G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively
#  spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005. 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')

parameter_list = [[traindat,testdat,label_traindat,1.1,1e-5,1],[traindat,testdat,label_traindat,1.2,1e-5,1]]

def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1):
	from shogun.Features import StringCharFeatures, BinaryLabels, DNA
	from shogun.Kernel import WeightedDegreeStringKernel
	try:
		from shogun.Classifier import SVMLight
	except ImportError:
		print('No support for SVMLight available.')
		return

	feats_train=StringCharFeatures(DNA)
	feats_train.set_features(fm_train_dna)
	feats_test=StringCharFeatures(DNA)
	feats_test.set_features(fm_test_dna)
	degree=20

	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	labels=BinaryLabels(label_train_dna)

	svm=SVMLight(C, kernel, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.train()

	kernel.init(feats_train, feats_test)
	svm.apply().get_labels()
	return kernel
if __name__=='__main__':
	print('SVMLight')
	classifier_svmlight_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_svmlin_modular.py

# In this example a two-class linear support vector machine classifier (SVM) is
# trained on a toy data set and the trained classifier is used to predict labels
# of test examples. As training algorithm the SVMLIN solver is used with the SVM
# regularization parameter C=0.9 and the bias in the classification rule switched
# on and the precision parameter epsilon=1e-5. The example also shows how to
# retrieve parameters (vector w and bias b)) of the trained linear classifier.
# 
# For more details on the SVMLIN solver see
#  V. Sindhwani, S.S. Keerthi. Newton Methods for Fast Solution of Semi-supervised
#  Linear SVMs. Large Scale Kernel Machines MIT Press (Book Chapter), 2007

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,0.9,1e-5,1],[traindat,testdat,label_traindat,0.8,1e-5,1]]

def classifier_svmlin_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,epsilon=1e-5,num_threads=1):
	from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
	from shogun.Classifier import SVMLin

	realfeat=RealFeatures(fm_train_real)
	feats_train=SparseRealFeatures()
	feats_train.obtain_from_simple(realfeat)
	realfeat=RealFeatures(fm_test_real)
	feats_test=SparseRealFeatures()
	feats_test.obtain_from_simple(realfeat)

	labels=BinaryLabels(label_train_twoclass)

	svm=SVMLin(C, feats_train, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.set_bias_enabled(True)
	svm.train()

	svm.set_features(feats_test)
	svm.get_bias()
	svm.get_w()
	svm.apply().get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()


if __name__=='__main__':
	print('SVMLin')
	classifier_svmlin_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_svmocas_modular.py

# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the OCAS solver is used with the SVM
# regularization parameter C=0.9 and the bias term in the classification rule
# switched off and the precision parameter epsilon=1e-5 (duality gap).
# 
# For more details on the OCAS solver see
#  V. Franc, S. Sonnenburg. Optimized Cutting Plane Algorithm for Large-Scale Risk
#  Minimization.The Journal of Machine Learning Research, vol. 10,
#  pp. 2157--2192. October 2009. 
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,0.9,1e-5,1],[traindat,testdat,label_traindat,0.8,1e-5,1]]

def classifier_svmocas_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,epsilon=1e-5,num_threads=1):

	from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
	from shogun.Classifier import SVMOcas

	realfeat=RealFeatures(fm_train_real)
	feats_train=SparseRealFeatures()
	feats_train.obtain_from_simple(realfeat)
	realfeat=RealFeatures(fm_test_real)
	feats_test=SparseRealFeatures()
	feats_test.obtain_from_simple(realfeat)

	labels=BinaryLabels(label_train_twoclass)

	svm=SVMOcas(C, feats_train, labels)
	svm.set_epsilon(epsilon)
	svm.parallel.set_num_threads(num_threads)
	svm.set_bias_enabled(False)
	svm.train()

	svm.set_features(feats_test)
	svm.apply().get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()

if __name__=='__main__':
	print('SVMOcas')
	classifier_svmocas_modular(*parameter_list[0])

../examples/documented/python_modular/classifier_svmsgd_modular.py

# In this example a two-class linear support vector machine classifier is trained
# on a toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the Stochastic Gradient Descent (SGD) solver is
# used with the SVM regularization parameter C=0.9. The number of iterations, i.e.
# passes though all training examples, is set to num_iter=5 .
# 
# For more details on the SGD solver see
#  L. Bottou, O. Bousquet. The tradeoff of large scale learning. In NIPS 20. MIT
#  Press. 2008.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,0.9,1,6],[traindat,testdat,label_traindat,0.8,1,5]]

def classifier_svmsgd_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,num_threads=1,num_iter=5):

	from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
	from shogun.Classifier import SVMSGD

	realfeat=RealFeatures(fm_train_real)
	feats_train=SparseRealFeatures()
	feats_train.obtain_from_simple(realfeat)
	realfeat=RealFeatures(fm_test_real)
	feats_test=SparseRealFeatures()
	feats_test.obtain_from_simple(realfeat)

	labels=BinaryLabels(label_train_twoclass)

	svm=SVMSGD(C, feats_train, labels)
	svm.set_epochs(num_iter)
	#svm.io.set_loglevel(0)
	svm.train()

	svm.set_features(feats_test)
	svm.apply().get_labels()
	predictions = svm.apply()
	return predictions, svm, predictions.get_labels()



if __name__=='__main__':
	print('SVMSGD')
	classifier_svmsgd_modular(*parameter_list[0])

Clustering

../examples/documented/python_modular/clustering_hierarchical_modular.py

# In this example an agglomerative hierarchical single linkage clustering method
# is used to cluster a given toy data set. Starting with each object being
# assigned to its own cluster clusters are iteratively merged. Here the clusters
# are merged that have the closest (minimum distance, here set via the Euclidean
# distance object) two elements.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[traindat,3],[traindat,4]]

def clustering_hierarchical_modular (fm_train=traindat,merges=3):

	from shogun.Distance import EuclideanDistance
	from shogun.Features import RealFeatures
	from shogun.Clustering import Hierarchical


	feats_train=RealFeatures(fm_train)
	distance=EuclideanDistance(feats_train, feats_train)

	hierarchical=Hierarchical(merges, distance)
	hierarchical.train()

	out_distance = hierarchical.get_merge_distances()
	out_cluster = hierarchical.get_cluster_pairs()

	return hierarchical,out_distance,out_cluster 

if __name__=='__main__':
	print('Hierarchical')
	clustering_hierarchical_modular(*parameter_list[0])

../examples/documented/python_modular/clustering_kmeans_modular.py

# In this example the k-means clustering method is used to cluster a given toy
# data set. In k-means clustering one tries to partition n observations into k
# clusters in which each observation belongs to the cluster with the nearest mean.
# The algorithm class constructor takes the number of clusters and a distance to
# be used as input. The distance used in this example is Euclidean distance.
# After training one can fetch the result of clustering by obtaining the cluster
# centers and their radiuses.

#!/usr/bin/env python
##!/usr/bin/env python
#"""
#Explicit examples on how to use clustering
#"""
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[traindat,3],[traindat,4]]

def clustering_kmeans_modular (fm_train=traindat,k=3):

	from shogun.Distance import EuclideanDistance
	from shogun.Features import RealFeatures
	from shogun.Clustering import KMeans
	from shogun.Mathematics import Math_init_random
	Math_init_random(17)

	feats_train=RealFeatures(fm_train)
	distance=EuclideanDistance(feats_train, feats_train)

	kmeans=KMeans(k, distance)
	kmeans.train()

	out_centers = kmeans.get_cluster_centers()
	kmeans.get_radiuses()

	return out_centers, kmeans

if __name__=='__main__':
	print('KMeans')
	clustering_kmeans_modular(*parameter_list[0])

Converter

../examples/documented/python_modular/converter_diffusionmaps_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def converter_diffusionmaps_modular (data,t):
	from shogun.Features import RealFeatures
	from shogun.Converter import DiffusionMaps
	from shogun.Kernel import GaussianKernel
	
	features = RealFeatures(data)
		
	converter = DiffusionMaps()
	converter.set_target_dim(1)
	converter.set_kernel(GaussianKernel(10,10.0))
	converter.set_t(t)
	converter.apply(features)

	return features


if __name__=='__main__':
	print('DiffusionMaps')
	converter_diffusionmaps_modular(*parameter_list[0])

../examples/documented/python_modular/converter_hessianlocallylinearembedding_modular.py

# In this example toy data is being preprocessed using the Hessian Locally Linear Embedding algorithm
# as described in
# 
# Donoho, D., & Grimes, C. (2003). 
# Hessian eigenmaps: new tools for nonlinear dimensionality reduction.
# Proceedings of National Academy of Science (Vol. 100, pp. 5591-5596).

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def converter_hessianlocallylinearembedding_modular (data,k):
	from shogun.Features import RealFeatures
	from shogun.Converter import HessianLocallyLinearEmbedding
	
	features = RealFeatures(data)
		
	converter = HessianLocallyLinearEmbedding()
	converter.set_target_dim(1)
	converter.set_k(k)
	converter.apply(features)

	return features


if __name__=='__main__':
	print('HessianLocallyLinearEmbedding')
	converter_hessianlocallylinearembedding_modular(*parameter_list[0])

../examples/documented/python_modular/converter_isomap_modular.py

# In this example toy data is being processed using the Isomap algorithm
# as described in
# 
# Silva, V. D., & Tenenbaum, J. B. (2003). 
# Global versus local methods in nonlinear dimensionality reduction. 
# Advances in Neural Information Processing Systems 15, 15(Figure 2), 721-728. MIT Press. 
# Retrieved from http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.9.3407&rep=rep1&type=pdf
# 
# Before applying to the data the landmark approximation is enabled with 
# specified number of landmarks. The landmark approximation is described in
# 
# Sparse multidimensional scaling using landmark points
# V De Silva, J B Tenenbaum (2004) Technology, p. 1-4
# 
# After enabling the landmark approximation k parameter -- the number 
# of neighbors in the k nearest neighbor graph -- is initialized.

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data]]

def converter_isomap_modular (data):
	from shogun.Features import RealFeatures
	from shogun.Converter import Isomap
	
	features = RealFeatures(data)
		
	converter = Isomap()
	converter.set_landmark(True)
	converter.set_landmark_number(5)
	converter.set_k(6)
	converter.set_target_dim(1)
	converter.apply(features)

	return features


if __name__=='__main__':
	print('Isomap')
	converter_isomap_modular(*parameter_list[0])

../examples/documented/python_modular/converter_kernellocallylinearembedding_modular.py

# In this example toy data is being processed using kernel extension
# of the Locally Linear Embedding (LLE) algorithm as described in
# 
# Kayo, O. (2006). Locally linear embedding algorithm. Extensions and applications. October.
# Retrieved from: http://herkules.oulu.fi/isbn9514280415/isbn9514280415.pd
# 
# Linear kernel is used as kernel of the extension.

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def converter_kernellocallylinearembedding_modular (data,k):
	from shogun.Features import RealFeatures
	from shogun.Converter import KernelLocallyLinearEmbedding
	from shogun.Kernel import LinearKernel
	
	features = RealFeatures(data)
		
	kernel = LinearKernel()

	converter = KernelLocallyLinearEmbedding(kernel)
	converter.set_target_dim(1)
	converter.set_k(k)
	converter.apply(features)

	return features


if __name__=='__main__':
	print('KernelLocallyLinearEmbedding')
	converter_kernellocallylinearembedding_modular(*parameter_list[0])

../examples/documented/python_modular/converter_kernellocaltangentspacealignment_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,60],[data,70]]

def converter_kernellocaltangentspacealignment_modular (data,k):
	from shogun.Features import RealFeatures
	from shogun.Converter import KernelLocalTangentSpaceAlignment
	
	features = RealFeatures(data)
		
	converter = KernelLocalTangentSpaceAlignment()
	converter.set_target_dim(1)
	converter.set_k(k)
	#converter.apply(features)

	return features


if __name__=='__main__':
	print('KernelLocalTangentSpaceAlignment')
	converter_kernellocaltangentspacealignment_modular(*parameter_list[0])

../examples/documented/python_modular/converter_laplacianeigenmaps_modular.py

# In this example toy data is being processed using Laplacian Eigenmaps
# algorithm as described in
# 
# Belkin, M., & Niyogi, P. (2002). 
# Laplacian Eigenmaps and Spectral Techniques for Embedding and Clustering. 
# Science, 14, 585-591. MIT Press. 
# Retrieved from http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.19.9400&rep=rep1&type=pdf
# 
# The number of neighbors for the kNN graph and the heat distribution
# coeffcient is set before processing the data

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def converter_laplacianeigenmaps_modular (data,k):
	from shogun.Features import RealFeatures
	from shogun.Converter import LaplacianEigenmaps
	
	features = RealFeatures(data)
		
	converter = LaplacianEigenmaps()
	converter.set_target_dim(1)
	converter.set_k(k)
	converter.set_tau(2.0)
	converter.apply(features)

	return features


if __name__=='__main__':
	print('LaplacianEigenmaps')
	converter_laplacianeigenmaps_modular(*parameter_list[0])

../examples/documented/python_modular/converter_linearlocaltangentspacealignment_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def converter_linearlocaltangentspacealignment_modular (data,k):
	from shogun.Features import RealFeatures
	from shogun.Converter import LinearLocalTangentSpaceAlignment
	
	features = RealFeatures(data)
		
	converter = LinearLocalTangentSpaceAlignment()
	converter.set_target_dim(1)
	converter.set_k(k)
	converter.apply(features)

	return features


if __name__=='__main__':
	print('LinearLocalTangentSpaceAlignment')
	converter_linearlocaltangentspacealignment_modular(*parameter_list[0])

../examples/documented/python_modular/converter_localitypreservingprojections_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def converter_localitypreservingprojections_modular (data,k):
	from shogun.Features import RealFeatures
	from shogun.Converter import LocalityPreservingProjections
	
	features = RealFeatures(data)
		
	converter = LocalityPreservingProjections()
	converter.set_target_dim(1)
	converter.set_k(k)
	converter.set_tau(2.0)
	converter.apply(features)

	return features


if __name__=='__main__':
	print('LocalityPreservingProjections')
	converter_localitypreservingprojections_modular(*parameter_list[0])

../examples/documented/python_modular/converter_locallylinearembedding_modular.py

# In this example toy data is being preprocessed using the Locally Linear Embedding (LLE)
# algorithm as described in
# 
# Saul, L. K., Ave, P., Park, F., & Roweis, S. T. (2001).
# An Introduction to Locally Linear Embedding. Available from, 290(5500), 2323-2326.
# Retrieved from: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.123.7319&rep=rep1&type=pdf
# 
# The number of neighbors used during the linear reconstruction step of the algorithm is set
# before processing of the data.

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def converter_locallylinearembedding_modular (data,k):
	from shogun.Features import RealFeatures
	from shogun.Converter import LocallyLinearEmbedding
	
	features = RealFeatures(data)
		
	converter = LocallyLinearEmbedding()
	converter.set_target_dim(1)
	converter.set_k(k)
	converter.apply(features)

	return features


if __name__=='__main__':
	print('LocallyLinearEmbedding')
	converter_locallylinearembedding_modular(*parameter_list[0])

../examples/documented/python_modular/converter_localtangentspacealignment_modular.py

# In this example toy data is being processed using the Local Tangent Space
# Alignment (LTSA) algorithms as described in
# 
# Zhang, Z., & Zha, H. (2002). Principal Manifolds 
# and Nonlinear Dimension Reduction via Local Tangent Space Alignment. 
# Journal of Shanghai University English Edition, 8(4), 406-424. SIAM. 
# Retrieved from http://arxiv.org/abs/cs/0212008
# 
# Before processing the number of neighbors for computing local tangent space
# is set

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,10],[data,20]]

def converter_localtangentspacealignment_modular (data,k):
	from shogun.Features import RealFeatures
	from shogun.Converter import LocalTangentSpaceAlignment
	
	features = RealFeatures(data)
		
	converter = LocalTangentSpaceAlignment()
	converter.set_target_dim(1)
	converter.set_k(k)
	converter.apply(features)

	return features


if __name__=='__main__':
	print('LocalTangentSpaceAlignment')
	converter_localtangentspacealignment_modular(*parameter_list[0])

../examples/documented/python_modular/converter_multidimensionalscaling_modular.py

# In this example toy data is being processed using the multidimensional
# scaling as described on p.261 (Section 12.1) of
# 
# Borg, I., & Groenen, P. J. F. (2005).
# Modern multidimensional scaling: Theory and applications. Springer.
# 
# Before processing the landmark approximation is disabled.

#!/usr/bin/env python
from tools.load import LoadMatrix
import numpy

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data]]

def converter_multidimensionalscaling_modular (data):
	from shogun.Features import RealFeatures
	from shogun.Converter import MultidimensionalScaling
	from shogun.Distance import EuclideanDistance
	
	features = RealFeatures(data)
		
	distance_before = EuclideanDistance()
	distance_before.init(features,features)

	converter = MultidimensionalScaling()
	converter.set_target_dim(2)
	converter.set_landmark(False)
	embedding =converter.apply(features)

	distance_after = EuclideanDistance()
	distance_after.init(embedding,embedding)

	distance_matrix_after = distance_after.get_distance_matrix()
	distance_matrix_before = distance_before.get_distance_matrix()

	return numpy.linalg.norm(distance_matrix_after-distance_matrix_before)/numpy.linalg.norm(distance_matrix_before)

if __name__=='__main__':
	print('MultidimensionalScaling')
	converter_multidimensionalscaling_modular(*parameter_list[0])

../examples/documented/python_modular/converter_stochasticproximityembedding_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix
lm = LoadMatrix()

data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data, 12]]

def converter_stochasticproximityembedding_modular (data, k):
	from shogun.Features import RealFeatures
	from shogun.Converter import StochasticProximityEmbedding, SPE_GLOBAL, SPE_LOCAL
	
	features = RealFeatures(data)
		
	converter = StochasticProximityEmbedding()
	converter.set_target_dim(1)
	converter.set_nupdates(40)
	# Embed with local strategy
	converter.set_k(k)
	converter.set_strategy(SPE_LOCAL)
	converter.embed(features)
	# Embed with global strategy
	converter.set_strategy(SPE_GLOBAL)
	converter.embed(features)

	return features

if __name__=='__main__':
	print('StochasticProximityEmbedding')
	converter_stochasticproximityembedding_modular(*parameter_list[0])

Distance

../examples/documented/python_modular/distance_braycurtis_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) controls the processing of the given data points, where a pairwise 
# distance matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# matrix between these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CBrayCurtisDistance.html.
# 
# Obviously, using the Bray Curtis distance is not limited to this showcase 
# example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_braycurtis_modular (fm_train_real=traindat,fm_test_real=testdat):


	from shogun.Features import RealFeatures
	from shogun.Distance import BrayCurtisDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=BrayCurtisDistance(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':
	print('BrayCurtisDistance')
	distance_braycurtis_modular(*parameter_list[0])

../examples/documented/python_modular/distance_canberra_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) controls the processing of the given data points, where a pairwise 
# distance (dissimilarity ratio) matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# matrix between these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CCanberraMetric.html.
# 
# Obviously, using the Canberra distance is not limited to this showcase
# example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_canberra_modular (fm_train_real=traindat,fm_test_real=testdat):
	from shogun.Features import RealFeatures
	from shogun.Distance import CanberraMetric

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=CanberraMetric(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()
	return distance,dm_train,dm_test

if __name__=='__main__':
	print('CanberaMetric')
	distance_canberra_modular(*parameter_list[0])

../examples/documented/python_modular/distance_canberraword_modular.py

# This example shows how to compute the Canberra Word Distance.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')


parameter_list = [[traindna,testdna,3,0,False],[traindna,testdna,3,0,False]]

def distance_canberraword_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False):
	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
	from shogun.Preprocessor import SortWordString
	from shogun.Distance import CanberraWordDistance
	
	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_train_dna)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	distance=CanberraWordDistance(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()
	return distance,dm_train,dm_test

if __name__=='__main__':
	print('CanberraWordDistance')
	distance_canberraword_modular(*parameter_list[0])

../examples/documented/python_modular/distance_chebyshew_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) controls the processing of the given data points, where a pairwise 
# distance (maximum of absolute feature dimension differences) matrix is 
# computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# (maximum of absolute feature dimension differences) matrix between these 
# two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CChebyshewMetric.html.
# 
# Obviously, using the Chebyshew distance is not limited to this showcase
# example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_chebyshew_modular (fm_train_real=traindat,fm_test_real=testdat):

	from shogun.Features import RealFeatures
	from shogun.Distance import ChebyshewMetric

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=ChebyshewMetric(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':
	print('ChebyshewMetric')
	distance_chebyshew_modular(*parameter_list[0])

../examples/documented/python_modular/distance_chisquare_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) controls the processing of the given data points, where a pairwise 
# distance matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# matrix between these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CChiSquareDistance.html.
# 
# Obviously, using the ChiSquare distance is not limited to this showcase
# example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,],[traindat,testdat]]

def distance_chisquare_modular (fm_train_real=traindat,fm_test_real=testdat):

	from shogun.Features import RealFeatures
	from shogun.Distance import ChiSquareDistance
	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=ChiSquareDistance(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()
	return distance,dm_train,dm_test

if __name__=='__main__':
	print('ChiSquareDistance')
	distance_chisquare_modular(*parameter_list[0])

../examples/documented/python_modular/distance_cosine_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) controls the processing of the given data points, where a pairwise 
# distance matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# matrix between these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CCosineDistance.html.
# 
# Obviously, using the Cosine distance is not limited to this showcase
# example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_cosine_modular (fm_train_real=traindat,fm_test_real=testdat):

	from shogun.Features import RealFeatures
	from shogun.Distance import CosineDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=CosineDistance(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':
	print('CosineDistance')
	distance_cosine_modular(*parameter_list[0])

../examples/documented/python_modular/distance_director_euclidian_modular.py

#!/usr/bin/env python
import numpy
from shogun.Features import RealFeatures
try:
	from shogun.Distance import DirectorDistance
except ImportError:
	print "recompile shogun with --enable-swig-directors"
	import sys
	sys.exit(0)


class DirectorEuclideanDistance(DirectorDistance):
	def __init__(self):
		DirectorDistance.__init__(self, True)
	def distance_function(self, idx_a, idx_b):
		seq1 = self.get_lhs().get_feature_vector(idx_a)
               	seq2 = self.get_rhs().get_feature_vector(idx_b)
		return numpy.linalg.norm(seq1-seq2)

traindat = numpy.random.random_sample((10,10))
testdat = numpy.random.random_sample((10,10))
parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.4]]

def distance_director_euclidean_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):

	from shogun.Distance import EuclideanDistance
	from modshogun import Time

	feats_train=RealFeatures(fm_train_real)
	feats_train.io.set_loglevel(0)
	feats_train.parallel.set_num_threads(1)
	feats_test=RealFeatures(fm_test_real)

	distance=EuclideanDistance()
	distance.init(feats_train, feats_test)

	ddistance=DirectorEuclideanDistance()
	ddistance.init(feats_train, feats_test)

	print  "dm_train"
	t=Time()
	dm_train=distance.get_distance_matrix()
	t1=t.cur_time_diff(True)

	print  "ddm_train"
	t=Time()
	ddm_train=ddistance.get_distance_matrix()
	t2=t.cur_time_diff(True)	

	print "dm_train", dm_train
	print "ddm_train", ddm_train

	return dm_train, ddm_train

if __name__=='__main__':
	print('DirectorEuclideanDistance')
	distance_director_euclidean_modular(*parameter_list[0])

../examples/documented/python_modular/distance_euclidian_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) controls the processing of the given data points, where a pairwise 
# distance matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# matrix between these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CEuclidianDistance.html.
# 
# Obviously, using the Euclidian distance is not limited to this showcase
# example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_euclidean_modular (fm_train_real=traindat,fm_test_real=testdat):

	from shogun.Features import RealFeatures
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=EuclideanDistance(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':
	print('EuclideanDistance')
	distance_euclidean_modular(*parameter_list[0])

../examples/documented/python_modular/distance_geodesic_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) controls the processing of the given data points, where a 
# pairwise distance (shortest path on a sphere) matrix is computed 
# by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# (shortest path on a sphere) matrix between these two data sets is 
# computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CGeodesicMetric.html.
# 
# Obviously, using the Geodesic distance is not limited to this showcase
# example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_geodesic_modular (fm_train_real=traindat,fm_test_real=testdat):

	from shogun.Features import RealFeatures
	from shogun.Distance import GeodesicMetric

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=GeodesicMetric(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test


if __name__=='__main__':
	print('GeodesicMetric')
	distance_geodesic_modular(*parameter_list[0])

../examples/documented/python_modular/distance_hammingword_modular.py

# This example shows how to compute the Hamming Word Distance for string features.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')
testdat = lm.load_labels('../data/fm_test_real.dat')

parameter_list = [[traindna,testdna,testdat,4,0,False,False],
		[traindna,testdna,testdat,3,0,False,False]]

def distance_hammingword_modular (fm_train_dna=traindna,fm_test_dna=testdna,
		fm_test_real=testdat,order=3,gap=0,reverse=False,use_sign=False):

	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
	from shogun.Preprocessor import SortWordString
	from shogun.Distance import HammingWordDistance

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_train_dna)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	distance=HammingWordDistance(feats_train, feats_train, use_sign)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()
	return distance,dm_train,dm_test

if __name__=='__main__':
	print('HammingWordDistance')
	distance_hammingword_modular(*parameter_list[0])

../examples/documented/python_modular/distance_jensen_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) controls the processing of the given data points, where a pairwise 
# distance (divergence measure based on the Kullback-Leibler divergence) matrix 
# is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# (divergence measure based on the Kullback-Leibler divergence) matrix between 
# these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CJensenMetric.html.
# 
# Obviously, using the Jensen-Shannon distance/divergence is not limited to 
# this showcase example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_jensen_modular (fm_train_real=traindat,fm_test_real=testdat):

	from shogun.Features import RealFeatures
	from shogun.Distance import JensenMetric

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=JensenMetric(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()
	return distance,dm_train,dm_test

if __name__=='__main__':
	print('JensenMetric')
	distance_jensen_modular(*parameter_list[0])

../examples/documented/python_modular/distance_mahalanobis_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix
lm = LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat  = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat, testdat]]

def distance_mahalanobis_modular (fm_train_real = traindat, fm_test_real = testdat):

	from shogun.Features import RealFeatures
	from shogun.Distance import MahalanobisDistance

	feats_train = RealFeatures(fm_train_real)
	feats_test  = RealFeatures(fm_test_real)

	distance = MahalanobisDistance(feats_train, feats_train)
	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':
	print('MahalanobisDistance')
	distance_mahalanobis_modular(*parameter_list[0])

../examples/documented/python_modular/distance_manhatten_modular.py

# This example shows how to compute the Manhatten Distance.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_manhatten_modular (fm_train_real=traindat,fm_test_real=testdat):

	from shogun.Features import RealFeatures
	from shogun.Distance import ManhattanMetric

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=ManhattanMetric(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':
	print('ManhattanMetric')
	distance_manhatten_modular(*parameter_list[0])

../examples/documented/python_modular/distance_manhattenword_modular.py

# This example shows how to compute the Manahattan Distance for string features.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindna,testdna,testdat,3,0,False],[traindna,testdna,testdat,4,0,False]]

def distance_manhattenword_modular (fm_train_dna=traindna ,fm_test_dna=testdna,fm_test_real=testdat,order=3,gap=0,reverse=False):

	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
	from shogun.Preprocessor import SortWordString
	from shogun.Distance import ManhattanWordDistance

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_train_dna)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	distance=ManhattanWordDistance(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()
	return dm_train,dm_test

if __name__=='__main__':
	print('ManhattanWordDistance')
	distance_manhattenword_modular(*parameter_list[0])

../examples/documented/python_modular/distance_minkowski_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) and norm 'k' controls the processing of the given data points, 
# where a pairwise distance matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# matrix between these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CMinkowskiMetric.html.
# 
# Obviously, using the Minkowski metric is not limited to this showcase
# example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,3],[traindat,testdat,4]]

def distance_minkowski_modular (fm_train_real=traindat,fm_test_real=testdat,k=3):

	from shogun.Features import RealFeatures
	from shogun.Distance import MinkowskiMetric

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=MinkowskiMetric(feats_train, feats_train, k)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':
	print('MinkowskiMetric')
	distance_minkowski_modular(*parameter_list[0])

../examples/documented/python_modular/distance_normsquared_modular.py

# In this example an squared euclidian distance is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_normsquared_modular (fm_train_real=traindat,fm_test_real=testdat):

	from shogun.Features import RealFeatures
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=EuclideanDistance(feats_train, feats_train)
	distance.set_disable_sqrt(True)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':

	print('EuclideanDistance - NormSquared')
	distance_normsquared_modular(*parameter_list[0])

../examples/documented/python_modular/distance_sparseeuclidean_modular.py

# In this example a sparse euclidean distance is computed for sparse toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_sparseeuclidean_modular (fm_train_real=traindat,fm_test_real=testdat):
	from shogun.Features import RealFeatures, SparseRealFeatures
	from shogun.Distance import SparseEuclideanDistance

	realfeat=RealFeatures(fm_train_real)
	feats_train=SparseRealFeatures()
	feats_train.obtain_from_simple(realfeat)
	realfeat=RealFeatures(fm_test_real)
	feats_test=SparseRealFeatures()
	feats_test.obtain_from_simple(realfeat)

	distance=SparseEuclideanDistance(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':
	print('SparseEuclideanDistance')
	distance_sparseeuclidean_modular(*parameter_list[0])

../examples/documented/python_modular/distance_tanimoto_modular.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values from different 
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
# 
# The distance initialized by two data sets (the same data set as shown in the 
# first call) controls the processing of the given data points, where a pairwise 
# distance (extended Jaccard coefficient) matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# The method call 'init'* binds the given data sets, where a pairwise distance 
# (extended Jaccard coefficient) matrix between these two data sets is computed 
# by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
# 
# *Note that the previous computed distance matrix can no longer be 
# reaccessed by 'get_distance_matrix'.
# 
# For more details see doc/classshogun_1_1CTanimotoDistance.html.
# 
# Obviously, using the Tanimoto distance/coefficient is not limited to
# this showcase example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def distance_tanimoto_modular (fm_train_real=traindat,fm_test_real=testdat):
	from shogun.Features import RealFeatures
	from shogun.Distance import TanimotoDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance=TanimotoDistance(feats_train, feats_train)

	dm_train=distance.get_distance_matrix()
	distance.init(feats_train, feats_test)
	dm_test=distance.get_distance_matrix()

	return distance,dm_train,dm_test

if __name__=='__main__':
	print('TanimotoDistance')
	distance_tanimoto_modular(*parameter_list[0])

Distribution

../examples/documented/python_modular/distribution_histogram_modular.py

# In this example the Histogram algorithm object computes a histogram over all
# 16bit unsigned integers in the features.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindna = lm.load_dna('../data/fm_train_dna.dat')

parameter_list = [[traindna,3,0,False],[traindna,4,0,False]]

def distribution_histogram_modular (fm_dna=traindna,order=3,gap=0,reverse=False):
	from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
	from shogun.Distribution import Histogram

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_dna)
	feats=StringWordFeatures(charfeat.get_alphabet())
	feats.obtain_from_char(charfeat, order-1, order, gap, reverse)

	histo=Histogram(feats)
	histo.train()

	histo.get_histogram()

	num_examples=feats.get_num_vectors()
	num_param=histo.get_num_model_parameters()
	#for i in xrange(num_examples):
	#	for j in xrange(num_param):
	#		histo.get_log_derivative(j, i)

	out_likelihood = histo.get_log_likelihood()
	out_sample = histo.get_log_likelihood_sample()
	return histo,out_sample,out_likelihood
###########################################################################
# call functions
###########################################################################

if __name__=='__main__':
	print('Histogram')
	distribution_histogram_modular(*parameter_list[0])

../examples/documented/python_modular/distribution_hmm_modular.py

# In this example a hidden markov model with 3 states and 6 transitions is trained
# on a string data set. After calling the constructor of the HMM class specifying
# the number of states and transitions the model is trained. Via the Baum-Welch
# algorithm the optimal transition and emission probabilities are estimated. The
# best path, i.e. the path with highest probability given the model can then be
# calculated using get_best_path_state.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
data=lm.load_cubes('../data/fm_train_cube.dat')

parameter_list=[[data, 1, 64, 1e-5, 2, 0, False, 5], [data, 3, 6, 1e-1, 1, 0, False, 2]]

def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples):
	from shogun.Features import StringWordFeatures, StringCharFeatures, CUBE
	from shogun.Distribution import HMM, BW_NORMAL

	charfeat=StringCharFeatures(CUBE)
	charfeat.set_features(fm_cube)
	feats=StringWordFeatures(charfeat.get_alphabet())
	feats.obtain_from_char(charfeat, order-1, order, gap, reverse)

	hmm=HMM(feats, N, M, pseudo)
	hmm.train()
	hmm.baum_welch_viterbi_train(BW_NORMAL)

	num_examples=feats.get_num_vectors()
	num_param=hmm.get_num_model_parameters()
	for i in range(num_examples):
		for j in range(num_param):
			hmm.get_log_derivative(j, i)

	best_path=0
	best_path_state=0
	for i in range(num_examples):
		best_path+=hmm.best_path(i)
		for j in range(N):
			best_path_state+=hmm.get_best_path_state(i, j)

	lik_example = hmm.get_log_likelihood()
	lik_sample = hmm.get_log_likelihood_sample()

	return lik_example, lik_sample, hmm

###########################################################################
# call functions
###########################################################################

if __name__=='__main__':
	print('HMM')
	distribution_hmm_modular(*parameter_list[0])

../examples/documented/python_modular/distribution_linearhmm_modular.py

# Trains an inhomogeneous Markov chain of order 3 on a DNA string data set. Due to
# the structure of the Markov chain it is very similar to a HMM with just one
# chain of connected hidden states - that is why we termed this linear HMM.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindna = lm.load_dna('../data/fm_train_dna.dat')

parameter_list = [[traindna,3,0,False],[traindna,4,0,False]]

def distribution_linearhmm_modular (fm_dna=traindna,order=3,gap=0,reverse=False):

	from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
	from shogun.Distribution import LinearHMM

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_dna)
	feats=StringWordFeatures(charfeat.get_alphabet())
	feats.obtain_from_char(charfeat, order-1, order, gap, reverse)

	hmm=LinearHMM(feats)
	hmm.train()

	hmm.get_transition_probs()

	num_examples=feats.get_num_vectors()
	num_param=hmm.get_num_model_parameters()
	for i in range(num_examples):
		for j in range(num_param):
			hmm.get_log_derivative(j, i)

	out_likelihood = hmm.get_log_likelihood()
	out_sample = hmm.get_log_likelihood_sample()

	return hmm,out_likelihood ,out_sample
###########################################################################
# call functions
###########################################################################

if __name__=='__main__':
	distribution_linearhmm_modular(*parameter_list[0])
	print('LinearHMM')

../examples/documented/python_modular/distribution_ppwm_modular.py

# In this example usage of the Positional PWM is shown

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindna = lm.load_dna('../data/fm_train_dna.dat')

parameter_list = [[traindna,3],[traindna,4]]

def distribution_ppwm_modular (fm_dna=traindna, order=3):
	from shogun.Features import StringByteFeatures, StringCharFeatures, DNA
	from shogun.Distribution import PositionalPWM

	from numpy import array,e,log,exp

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_dna)
	feats=StringByteFeatures(charfeat.get_alphabet())
	feats.obtain_from_char(charfeat, order-1, order, 0, False)

	L=20
	k=3
	sigma = 1;
	mu = 4

	ppwm=PositionalPWM()
	ppwm.set_sigma(sigma)
	ppwm.set_mean(mu)
	pwm=array([[0.0, 0.5, 0.1, 1.0],
               [0.0, 0.5, 0.5, 0.0],
               [1.0, 0.0, 0.4, 0.0],
               [0.0, 0.0, 0.0, 0.0]]);
	pwm=array([[0.01,0.09,0.1],[0.09,0.01,0.1],[0.85,0.4,0.1],[0.05,0.5,0.7]])



	ppwm.set_pwm(log(pwm))
	#print(ppwm.get_pwm())
	ppwm.compute_w(L)
	w=ppwm.get_w()
	#print(w)
	#from pylab import *
	#figure(1)
	#pcolor(exp(w))
	#pcolor(w)
	#colorbar()

	#figure(2)
	ppwm.compute_scoring(1)
	u=ppwm.get_scoring(0)
	#pcolor(exp(u))
	#show()

	#ppwm=PositionalPWM(feats)
	#ppwm.train()

	#out_likelihood = histo.get_log_likelihood()
	#out_sample = histo.get_log_likelihood_sample()
	return w,u
###########################################################################
# call functions
###########################################################################

if __name__=='__main__':
	print('PositionalPWM')
	distribution_ppwm_modular(*parameter_list[0])

Evaluation

../examples/documented/python_modular/evaluation_clustering.py

# Example on how to evaluate the clustering performance (given ground-truth)

#!/usr/bin/env python
def get_dataset():
	from os.path import exists
	filename = "../data/optdigits.tes"
	if exists(filename):
		return open(filename)
	else:
		#print("Retrieving data...")
		try:
			from urllib2 import urlopen
		except ImportError:
			from urllib.request import urlopen
		return urlopen("http://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tes")

def prepare_data():
	from numpy import loadtxt
	stream = get_dataset()
	#print("Loading data...")
	data = loadtxt(stream, delimiter=',')
	fea = data[:, :-1]
	gnd = data[:, -1]
	return (fea.T, gnd)

(fea, gnd_raw) = prepare_data()
parameter_list = [[fea, gnd_raw, 10]]


def run_clustering(data, k):
	from shogun.Clustering import KMeans
	from shogun.Mathematics import Math_init_random
	from shogun.Distance import EuclideanDistance
	from shogun.Features import RealFeatures

	Math_init_random(42)
	fea = RealFeatures(data)
	distance = EuclideanDistance(fea, fea)
	kmeans=KMeans(k, distance)

	#print("Running clustering...")
	kmeans.train()

	return kmeans.get_cluster_centers()

def assign_labels(data, centroids, ncenters):
	from shogun.Distance import EuclideanDistance
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import KNN
	from numpy import arange

	labels = MulticlassLabels(arange(0.,ncenters))
	fea = RealFeatures(data)
	fea_centroids = RealFeatures(centroids)
	distance = EuclideanDistance(fea_centroids, fea_centroids)
	knn = KNN(1, distance, labels)
	knn.train()
	return knn.apply(fea)

def evaluation_clustering (features=fea, ground_truth=gnd_raw, ncenters=10):
	from shogun.Evaluation import ClusteringAccuracy, ClusteringMutualInformation
	from shogun.Features import MulticlassLabels
	centroids = run_clustering(features, ncenters)
	gnd_hat = assign_labels(features, centroids, ncenters)
	gnd = MulticlassLabels(ground_truth)

	AccuracyEval = ClusteringAccuracy()
	AccuracyEval.best_map(gnd_hat, gnd)

	accuracy = AccuracyEval.evaluate(gnd_hat, gnd)
	#print(('Clustering accuracy = %.4f' % accuracy))

	MIEval = ClusteringMutualInformation()
	mutual_info = MIEval.evaluate(gnd_hat, gnd)
	#print(('Clustering mutual information = %.4f' % mutual_info))

	return gnd, accuracy, mutual_info

if __name__ == '__main__':
	print('Evaluation Clustering')
	evaluation_clustering(*parameter_list[0])

../examples/documented/python_modular/evaluation_contingencytableevaluation_modular.py

# In this example various (accuracy, error rate, ..) measures are being computed
# for the pair of ground truth toy data and random data. 

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()

ground_truth = lm.load_labels('../data/label_train_twoclass.dat')
random.seed(17)
predicted = random.randn(len(ground_truth))

parameter_list = [[ground_truth,predicted]]

def evaluation_contingencytableevaluation_modular (ground_truth, predicted):
	from shogun.Features import BinaryLabels
	from shogun.Evaluation import ContingencyTableEvaluation
	from shogun.Evaluation import AccuracyMeasure,ErrorRateMeasure,BALMeasure
	from shogun.Evaluation import WRACCMeasure,F1Measure,CrossCorrelationMeasure
	from shogun.Evaluation import RecallMeasure,PrecisionMeasure,SpecificityMeasure

	ground_truth_labels = BinaryLabels(ground_truth)
	predicted_labels = BinaryLabels(predicted)
	
	base_evaluator = ContingencyTableEvaluation()
	base_evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = AccuracyMeasure()
	accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = ErrorRateMeasure()
	errorrate = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = BALMeasure()
	bal = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = WRACCMeasure()
	wracc = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = F1Measure()
	f1 = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = CrossCorrelationMeasure()
	crosscorrelation = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = RecallMeasure()
	recall = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = PrecisionMeasure()
	precision = evaluator.evaluate(predicted_labels,ground_truth_labels)

	evaluator = SpecificityMeasure()
	specificity = evaluator.evaluate(predicted_labels,ground_truth_labels)

	return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity


if __name__=='__main__':
	print('EvaluationContingencyTableEvaluation')
	evaluation_contingencytableevaluation_modular(*parameter_list[0])

../examples/documented/python_modular/evaluation_cross_validation_classification.py

#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2012 Heiko Strathmann
# Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
#

from numpy.random import randn
from numpy import *

# generate some overlapping training vectors
num_vectors=100
vec_distance=1
traindat=concatenate((randn(2,num_vectors)-vec_distance,
	randn(2,num_vectors)+vec_distance), axis=1)
label_traindat=concatenate((-ones(num_vectors), ones(num_vectors)));

parameter_list = [[traindat,label_traindat]]

def evaluation_cross_validation_classification (traindat=traindat, label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.Features import BinaryLabels
    from shogun.Features import RealFeatures
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC

    # training data
    features=RealFeatures(traindat)
    labels=BinaryLabels(label_traindat)

    # classifier
    classifier=LibLinear(L2R_L2LOSS_SVC)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation=CrossValidation(classifier, features, labels,
	    splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)
	
    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)
	
    # perform cross-validation and print(results)
    result=cross_validation.evaluate()
    #print("mean:", result.mean)
    #if result.has_conf_int:
    #    print("[", result.conf_int_low, ",", result.conf_int_up, "] with alpha=", result.conf_int_alpha)

if __name__=='__main__':
	print('Evaluation CrossValidationClassification')
	evaluation_cross_validation_classification(*parameter_list[0])

../examples/documented/python_modular/evaluation_cross_validation_mkl_weight_storage.py

#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2012 Heiko Strathmann
# Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
#

from numpy.random import randn
from numpy import *

# generate some overlapping training vectors
num_vectors=5
vec_distance=1
traindat=concatenate((randn(2,num_vectors)-vec_distance,
	randn(2,num_vectors)+vec_distance), axis=1)
label_traindat=concatenate((-ones(num_vectors), ones(num_vectors)));

parameter_list = [[traindat,label_traindat]]

def evaluation_cross_validation_classification (traindat=traindat, label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import CrossValidationPrintOutput
    from shogun.Evaluation import CrossValidationMKLStorage
    from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.Features import BinaryLabels
    from shogun.Features import RealFeatures, CombinedFeatures
    from shogun.Kernel import GaussianKernel, CombinedKernel
    from shogun.Classifier import LibSVM, MKLClassification
    from shogun.Mathematics import Statistics

    # training data, combined features all on same data
    features=RealFeatures(traindat)
    comb_features=CombinedFeatures()
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    labels=BinaryLabels(label_traindat)
    
    # kernel, different Gaussians combined
    kernel=CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 0.1))
    kernel.append_kernel(GaussianKernel(10, 1))
    kernel.append_kernel(GaussianKernel(10, 2))

    # create mkl using libsvm, due to a mem-bug, interleaved is not possible
    svm=MKLClassification(LibSVM());
    svm.set_interleaved_optimization_enabled(False);
    svm.set_kernel(kernel);

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation=CrossValidation(svm, comb_features, labels,
        splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)

    # append cross vlaidation output classes
    cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
    mkl_storage=CrossValidationMKLStorage()
    cross_validation.add_cross_validation_output(mkl_storage)
    cross_validation.set_num_runs(3)
    
    # perform cross-validation
    result=cross_validation.evaluate()

    # print mkl weights
    weights=mkl_storage.get_mkl_weights()
    print "mkl weights during cross--validation"
    print weights
    
    print "mean per kernel"
    print Statistics.matrix_mean(weights, False)
    
    print "variance per kernel"
    print Statistics.matrix_variance(weights, False)
    
    print "std-dev per kernel"
    print Statistics.matrix_std_deviation(weights, False)
    

if __name__=='__main__':
	print('Evaluation CrossValidationClassification')
	evaluation_cross_validation_classification(*parameter_list[0])

../examples/documented/python_modular/evaluation_cross_validation_multiclass_storage.py

#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2012 Heiko Strathmann
# Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
#

from numpy.random import randn
from numpy import *

# generate some overlapping training vectors
num_vectors=100
vec_distance=1
traindat=concatenate((randn(2,num_vectors)-vec_distance,
	randn(2,num_vectors)+vec_distance), axis=1)
label_traindat=concatenate((zeros(num_vectors), ones(num_vectors)));

parameter_list = [[traindat,label_traindat]]

def evaluation_cross_validation_multiclass_storage (traindat=traindat, label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import CrossValidationPrintOutput
    from shogun.Evaluation import CrossValidationMKLStorage, CrossValidationMulticlassStorage
    from shogun.Evaluation import MulticlassAccuracy, F1Measure
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.Features import MulticlassLabels
    from shogun.Features import RealFeatures, CombinedFeatures
    from shogun.Kernel import GaussianKernel, CombinedKernel
    from shogun.Classifier import MKLMulticlass
    from shogun.Mathematics import Statistics, MSG_DEBUG

    # training data, combined features all on same data
    features=RealFeatures(traindat)
    comb_features=CombinedFeatures()
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    comb_features.append_feature_obj(features)
    labels=MulticlassLabels(label_traindat)
    
    # kernel, different Gaussians combined
    kernel=CombinedKernel()
    kernel.append_kernel(GaussianKernel(10, 0.1))
    kernel.append_kernel(GaussianKernel(10, 1))
    kernel.append_kernel(GaussianKernel(10, 2))

    # create mkl using libsvm, due to a mem-bug, interleaved is not possible
    svm=MKLMulticlass(1.0,kernel,labels);
    svm.set_kernel(kernel);

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=MulticlassAccuracy()

    # cross-validation instance
    cross_validation=CrossValidation(svm, comb_features, labels,
        splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)

    # append cross vlaidation output classes
    #cross_validation.add_cross_validation_output(CrossValidationPrintOutput())
    #mkl_storage=CrossValidationMKLStorage()
    #cross_validation.add_cross_validation_output(mkl_storage)
    multiclass_storage=CrossValidationMulticlassStorage()
    multiclass_storage.append_binary_evaluation(F1Measure())
    cross_validation.add_cross_validation_output(multiclass_storage)
    cross_validation.set_num_runs(3)
    
    # perform cross-validation
    result=cross_validation.evaluate()

    roc_0_0_0 = multiclass_storage.get_fold_ROC(0,0,0)
    #print roc_0_0_0
    auc_0_0_0 = multiclass_storage.get_fold_evaluation_result(0,0,0,0)
    #print auc_0_0_0
    return roc_0_0_0, auc_0_0_0


if __name__=='__main__':
	print('Evaluation CrossValidationMulticlassStorage')
	evaluation_cross_validation_multiclass_storage(*parameter_list[0])

../examples/documented/python_modular/evaluation_cross_validation_regression.py

#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2012 Heiko Strathmann
# Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
#

from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,0.8,1e-6],[traindat,testdat,label_traindat,0.9,1e-7]]

def evaluation_cross_validation_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat,width=0.8,tau=1e-6):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import MeanSquaredError
    from shogun.Evaluation import CrossValidationSplitting
    from shogun.Features import RegressionLabels, RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import KernelRidgeRegression

    # training data
    features=RealFeatures(fm_train)
    labels=RegressionLabels(label_train)

    # kernel and predictor
    kernel=GaussianKernel()
    predictor=KernelRidgeRegression(tau, kernel, labels)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but here, the std x-val is used
    splitting_strategy=CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=MeanSquaredError()

    # cross-validation instance
    cross_validation=CrossValidation(predictor, features, labels,
        splitting_strategy, evaluation_criterium)

    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # (optional) tell machine to precompute kernel matrix. speeds up. may not work
    predictor.data_lock(labels, features)

    # perform cross-validation and print(results)
    result=cross_validation.evaluate()
    #print("mean:", result.mean)
    #if result.has_conf_int:
    #    print("[", result.conf_int_low, ",", result.conf_int_up, "] with alpha=", result.conf_int_alpha)

if __name__=='__main__':
	print('Evaluation CrossValidationClassification')
	evaluation_cross_validation_regression(*parameter_list[0])

../examples/documented/python_modular/evaluation_meansquarederror_modular.py

# In this example a mean squared error (MSE) is being computed 
# for the pair of random vectors of length N.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()

N = 100

random.seed(17)
ground_truth = random.randn(N)
predicted = random.randn(N)

parameter_list = [[ground_truth,predicted]]

def evaluation_meansquarederror_modular (ground_truth, predicted):
	from shogun.Features import RegressionLabels
	from shogun.Evaluation import MeanSquaredError

	ground_truth_labels = RegressionLabels(ground_truth)
	predicted_labels = RegressionLabels(predicted)
	
	evaluator = MeanSquaredError()
	mse = evaluator.evaluate(predicted_labels,ground_truth_labels)

	return mse


if __name__=='__main__':
	print('MeanSquaredError')
	evaluation_meansquarederror_modular(*parameter_list[0])

../examples/documented/python_modular/evaluation_meansquaredlogerror_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()

N = 100

random.seed(17)
ground_truth = abs(random.randn(N))
predicted = abs(random.randn(N))

parameter_list = [[ground_truth,predicted]]

def evaluation_meansquaredlogerror_modular (ground_truth, predicted):
	from shogun.Features import RegressionLabels
	from shogun.Evaluation import MeanSquaredLogError

	ground_truth_labels = RegressionLabels(ground_truth)
	predicted_labels = RegressionLabels(predicted)
	
	evaluator = MeanSquaredLogError()
	mse = evaluator.evaluate(predicted_labels,ground_truth_labels)

	return mse


if __name__=='__main__':
	print('EvaluationMeanSquaredLogError')
	evaluation_meansquaredlogerror_modular(*parameter_list[0])

../examples/documented/python_modular/evaluation_multiclassaccuracy_modular.py

# In this example a multiclass accuracy is being computed for toy data labels
# and toy data labels multiplied by two.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()

random.seed(17)
ground_truth = lm.load_labels('../data/label_train_multiclass.dat')
predicted = lm.load_labels('../data/label_train_multiclass.dat') * 2

parameter_list = [[ground_truth,predicted]]

def evaluation_multiclassaccuracy_modular (ground_truth, predicted):
	from shogun.Features import MulticlassLabels
	from shogun.Evaluation import MulticlassAccuracy

	ground_truth_labels = MulticlassLabels(ground_truth)
	predicted_labels = MulticlassLabels(predicted)
	
	evaluator = MulticlassAccuracy()
	accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels)
	
	return accuracy


if __name__=='__main__':
	print('MulticlassAccuracy')
	evaluation_multiclassaccuracy_modular(*parameter_list[0])

../examples/documented/python_modular/evaluation_multiclassovrevaluation_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()

random.seed(17)
from tools.multiclass_shared import prepare_data
[traindat, label_traindat, testdat, label_testdat] = prepare_data(False)

parameter_list = [[traindat, label_traindat, testdat, label_testdat]]

def evaluation_multiclassovrevaluation_modular (traindat, label_traindat, testdat, label_testdat):
	from shogun.Features import MulticlassLabels
	from shogun.Evaluation import MulticlassOVREvaluation,ROCEvaluation
	from modshogun import MulticlassLibLinear,RealFeatures,ContingencyTableEvaluation,ACCURACY

	ground_truth_labels = MulticlassLabels(label_traindat)
	svm = MulticlassLibLinear(1.0,RealFeatures(traindat),MulticlassLabels(label_traindat))
	svm.train()
	predicted_labels = svm.apply()
	
	binary_evaluator = ROCEvaluation()
	evaluator = MulticlassOVREvaluation(binary_evaluator)
	mean_roc = evaluator.evaluate(predicted_labels,ground_truth_labels)
	print mean_roc
	
	binary_evaluator = ContingencyTableEvaluation(ACCURACY)
	evaluator = MulticlassOVREvaluation(binary_evaluator)
	mean_accuracy = evaluator.evaluate(predicted_labels,ground_truth_labels)
	print mean_accuracy

	return mean_roc, mean_accuracy


if __name__=='__main__':
	print('MulticlassOVREvaluation')
	evaluation_multiclassovrevaluation_modular(*parameter_list[0])

../examples/documented/python_modular/evaluation_prcevaluation_modular.py

# In this example PRC (Precision-Recall curve) is being computed
# for the pair of ground truth toy labels and random labels.
# PRC curve (as matrix) and auPRC (area under PRC) is returned.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()

ground_truth = lm.load_labels('../data/label_train_twoclass.dat')
random.seed(17)
predicted = random.randn(len(ground_truth))

parameter_list = [[ground_truth,predicted]]

def evaluation_prcevaluation_modular (ground_truth, predicted):
	from shogun.Features import BinaryLabels
	from shogun.Evaluation import PRCEvaluation

	ground_truth_labels = BinaryLabels(ground_truth)
	predicted_labels = BinaryLabels(predicted)
	
	evaluator = PRCEvaluation()
	evaluator.evaluate(predicted_labels,ground_truth_labels)
	
	return evaluator.get_PRC(), evaluator.get_auPRC()


if __name__=='__main__':
	print('PRCEvaluation')
	evaluation_prcevaluation_modular(*parameter_list[0])

../examples/documented/python_modular/evaluation_rocevaluation_modular.py

# In this example ROC (Receiver Operator Characteristic) is being computed
# for the pair of ground truth toy labels and random labels.
# ROC curve (as matrix) and auROC (area under ROC) is returned.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import random
lm=LoadMatrix()

ground_truth = lm.load_labels('../data/label_train_twoclass.dat')
random.seed(17)
predicted = random.randn(len(ground_truth))

parameter_list = [[ground_truth,predicted]]

def evaluation_rocevaluation_modular (ground_truth, predicted):
	from shogun.Features import BinaryLabels
	from shogun.Evaluation import ROCEvaluation

	ground_truth_labels = BinaryLabels(ground_truth)
	predicted_labels = BinaryLabels(predicted)
	
	evaluator = ROCEvaluation()
	evaluator.evaluate(predicted_labels,ground_truth_labels)

	return evaluator.get_ROC(), evaluator.get_auROC()


if __name__=='__main__':
	print('ROCEvaluation')
	evaluation_rocevaluation_modular(*parameter_list[0])

../examples/documented/python_modular/evaluation_thresholds_modular.py

#!/usr/bin/env python
parameter_list = [[1000]]

def evaluation_thresholds_modular (index):
	from modshogun import BinaryLabels, ROCEvaluation
	import numpy
	numpy.random.seed(17)
	output=numpy.arange(-1,1,0.001)
	output=(0.3*output+0.7*(numpy.random.rand(len(output))-0.5))
	label=[-1.0]*(len(output)//2)
	label.extend([1.0]*(len(output)//2))
	label=numpy.array(label)

	pred=BinaryLabels(output)
	truth=BinaryLabels(label)

	evaluator=ROCEvaluation()
	evaluator.evaluate(pred, truth)

	[fp,tp]=evaluator.get_ROC()

	thresh=evaluator.get_thresholds()
	b=thresh[index]

	#print("tpr", numpy.mean(output[label>0]>b), tp[index])
	#print("fpr", numpy.mean(output[label<0]>b), fp[index])

	return tp[index],fp[index],numpy.mean(output[label>0]>b),numpy.mean(output[label<0]>b)

if __name__=='__main__':
	print('Evaluation with Thresholds')
	evaluation_thresholds_modular(*parameter_list[0])

Features

../examples/documented/python_modular/features_binned_dot_modular.py

#!/usr/bin/env python
import numpy

matrix=numpy.array([[-1.0,0,1],[2,3,4],[5,6,7]])
bins=numpy.array([[0.0, 0.0, 0.0],[1.0,1.0,1.0],[2.0,2.0,2.0],[3.0,3.0,3.0],[4.0,4.0,4.0]])


parameter_list = [(matrix,bins)]

def features_binned_dot_modular (matrix, bins):
	from modshogun import RealFeatures, BinnedDotFeatures
	rf=RealFeatures(matrix)

	#print(rf.get_feature_matrix())

	bf=BinnedDotFeatures(rf, bins)
	filled=bf.get_computed_dot_feature_matrix()

	bf.set_fill(False)
	unfilled=bf.get_computed_dot_feature_matrix()

	bf.set_norm_one(True)
	unfilled_normed=bf.get_computed_dot_feature_matrix()

	bf.set_fill(True)
	filled_normed=bf.get_computed_dot_feature_matrix()

	return bf,filled,unfilled,unfilled_normed,filled_normed

if __name__=='__main__':
    print('BinnedDotFeatures')
    features_binned_dot_modular(*parameter_list[0])

../examples/documented/python_modular/features_dense_byte_modular.py

#!/usr/bin/env python
import numpy

# create dense matrix A
A=numpy.array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=numpy.uint8)

parameter_list=[[A]]

def features_dense_byte_modular (A):
	from shogun.Features import ByteFeatures

	# create dense features a
	# ... of type Byte
	a=ByteFeatures(A)

	# print(some statistics about a)
	#print(a.get_num_vectors())
	#print(a.get_num_features())

	# get first feature vector and set it
	#print(a.get_feature_vector(0))
	a.set_feature_vector(numpy.array([1,4,0,0,0,9], dtype=numpy.uint8), 0)

	# get matrix
	a_out = a.get_feature_matrix()

	#print(type(a_out), a_out.dtype)
	#print(a_out )
	assert(numpy.all(a_out==A))
	return a_out,a

if __name__=='__main__':
	print('ByteFeatures')
	features_dense_byte_modular(*parameter_list[0])

../examples/documented/python_modular/features_dense_longint_modular.py

#!/usr/bin/env python
from shogun.Features import LongIntFeatures
from numpy import array, int64, all

# create dense matrix A
matrix=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64)

parameter_list = [[matrix]]

# ... of type LongInt
def features_dense_longint_modular (A=matrix):
	a=LongIntFeatures(A)
	# get first feature vector and set it

	a.set_feature_vector(array([1,4,0,0,0,9], dtype=int64), 0)

	# get matrix
	a_out = a.get_feature_matrix()

	assert(all(a_out==A))
	return a_out

if __name__=='__main__':
	print('dense_longint')
	features_dense_longint_modular(*parameter_list[0])

../examples/documented/python_modular/features_dense_modular.py

#!/usr/bin/env python
from shogun.Features import RealFeatures, LongIntFeatures, ByteFeatures
from numpy import array, float64, int64, uint8, all

# create dense matrices A,B,C

matrixA=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64)
matrixB=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64)
matrixC=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=uint8)

# ... of type Real, LongInt and Byte
parameter_list = [[matrixA,matrixB,matrixC]]

def features_dense_modular (A=matrixA,B=matrixB,C=matrixC):

    a=RealFeatures(A)
    b=LongIntFeatures(B)
    c=ByteFeatures(C)
    
# or 16bit wide ...
#feat1 = f.ShortFeatures(N.zeros((10,5),N.short))
#feat2 = f.WordFeatures(N.zeros((10,5),N.uint16))


# print(some statistics about a)

# get first feature vector and set it

    a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0)

# get matrices
    a_out = a.get_feature_matrix()
    b_out = b.get_feature_matrix()
    c_out = c.get_feature_matrix()

    assert(all(a_out==A))

    assert(all(b_out==B))

    assert(all(c_out==C))
    return a_out,b_out,c_out,a,b,c

if __name__=='__main__':
    print('dense')
    features_dense_modular(*parameter_list[0])

../examples/documented/python_modular/features_dense_protocols_modular.py

#!/usr/bin/env python
import numpy
from shogun.Features import RealFeatures
from shogun.Features import LongIntFeatures

from numpy import array, float64, int64

# create dense matrice
data=[[1,2,3],[4,5,6],[7,8,9],[-1,-2,-3]]

parameter_list = [[data]]

def features_dense_protocols_modular (in_data=data):
	m_real=array(in_data, dtype=float64, order='F')
	f_real=RealFeatures(m_real)

	print m_real
	print f_real

	print f_real[-1]
	print f_real[1, 2]
	print f_real[-1:3]
	print f_real[2, 0:2]
	print f_real[0:3, 1]
	print f_real[0:3, 1:2]
	print f_real[:,1]
	print f_real[1,:]

	print m_real[-2]
	f_real[-1]=m_real[-2]
	print f_real[-1]

	print m_real[0, 1]
	f_real[1,2]=m_real[0,1]
	print f_real[1, 2]

	print m_real[0:2]
	f_real[1:3]=m_real[0:2]
	print f_real[1:3]

	print m_real[0, 0:2]
	f_real[2, 0:2]=m_real[0,0:2]
	print f_real[2, 0:2]

	print m_real[0:3, 2]
	f_real[0:3,1]=m_real[0:3, 2]
	print f_real[0:3, 1]

	print m_real[0:3, 0:1]
	f_real[0:3,1:2]=m_real[0:3,0:1]
	print f_real[0:3, 1:2]

	f_real[:,0]=0
	print f_real.get_feature_matrix()

	if numpy.__version__ >= '1.5':
		f_real+=m_real
		f_real*=m_real
		f_real-=m_real
	else:
		print "numpy version >= 1.5 is needed"
		return None

	f_real+=f_real
	f_real*=f_real
	f_real-=f_real

	print f_real
	print f_real.get_feature_matrix()

	try:
		mem_real=memoryview(f_real)
	except NameError:
		print "Python2.7 is needed for memoryview class"
		return None

	ret_real=array(f_real)
	print ret_real

	return f_real[:,0]

if __name__=='__main__':
	print('dense_protocols')
	features_dense_protocols_modular(*parameter_list[0])

../examples/documented/python_modular/features_dense_real_modular.py

#!/usr/bin/env python
from shogun.Features import RealFeatures
from numpy import array, float64, all

# create dense matrices A,B,C
matrix=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64)

parameter_list = [[matrix]]

# ... of type LongInt
def features_dense_real_modular (A=matrix):
  
# ... of type Real, LongInt and Byte
    a=RealFeatures(A)

# print(some statistics about a)
#print(a.get_num_vectors())
#print(a.get_num_features())

# get first feature vector and set it
#print(a.get_feature_vector(0))
    a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0)

# get matrix
    a_out = a.get_feature_matrix()

    assert(all(a_out==A))
    return a_out


if __name__=='__main__':
    print('dense_real')
    features_dense_real_modular(*parameter_list[0])

../examples/documented/python_modular/features_dense_zero_copy_modular.py

#!/usr/bin/env python
import numpy
from shogun.Features import RealFeatures
from numpy import array, float64, int64

# create dense matrice
data=[[1,2,3],[4,5,6],[7,8,9],[-1,-2,-3]]

parameter_list = [[data]]

def features_dense_zero_copy_modular (in_data=data):
	feats = None
	if numpy.__version__ >= '1.5':
		feats=numpy.array(in_data, dtype=float64, order='F')		

		a=RealFeatures()
		a.frombuffer(feats, False)

		b=numpy.array(a, copy=False)
		c=numpy.array(a, copy=True)

		d=RealFeatures()
		d.frombuffer(a, False)

		e=RealFeatures()
		e.frombuffer(a, True)

		a[:,0]=0
		print a[0:4]
		print b[0:4]
		print c[0:4]
		print d[0:4]
		print e[0:4]
	else:
		print "numpy version >= 1.5 is needed"

	return feats

if __name__=='__main__':
	print('dense_zero_copy')
	features_dense_zero_copy_modular(*parameter_list[0])

../examples/documented/python_modular/features_director_dot_modular.py

#!/usr/bin/env python
import numpy
try:
	from shogun.Features import DirectorDotFeatures
	from shogun.Library import RealVector
except ImportError:
	print "recompile shogun with --enable-swig-directors"
	import sys
	sys.exit(0)

from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,0.9,1e-3],[traindat,testdat,label_traindat,0.8,1e-2]]

class NumpyFeatures(DirectorDotFeatures):

	# variables
	data=numpy.empty((1,1))
	
	# constructor
	def __init__(self, d):
		DirectorDotFeatures.__init__(self)
		self.data = d
	
	# overloaded methods
	def add_to_dense_sgvec(self, alpha, vec_idx1, vec2, abs):
		if abs:
			vec2+=alpha*numpy.abs(self.data[:,vec_idx1])
		else:
			vec2+=alpha*self.data[:,vec_idx1]

	def dot(self, vec_idx1, df, vec_idx2):
		return numpy.dot(self.data[:,vec_idx1], df.get_computed_dot_feature_vector(vec_idx2))

	def dense_dot_sgvec(self, vec_idx1, vec2):
		return numpy.dot(self.data[:,vec_idx1], vec2[0:vec2.vlen])

	def get_num_vectors(self):
		return self.data.shape[1]

	def get_dim_feature_space(self):
		return self.data.shape[0]

	# operators
#	def __add__(self, other):
#		return NumpyFeatures(self.data+other.data)

#	def __sub__(self, other):
#		return NumpyFeatures(self.data-other.data)

#	def __iadd__(self, other):
#		return NumpyFeatures(self.data+other.data)

#	def __isub__(self, other):
#		return NumpyFeatures(self.data-other.data)

def features_director_dot_modular (fm_train_real, fm_test_real,
		label_train_twoclass, C, epsilon):

	from shogun.Features import RealFeatures, SparseRealFeatures, BinaryLabels
	from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL
	from shogun.Mathematics import Math_init_random
	Math_init_random(17)

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	labels=BinaryLabels(label_train_twoclass)

	dfeats_train=NumpyFeatures(fm_train_real)
	dfeats_test=NumpyFeatures(fm_test_real)
	dlabels=BinaryLabels(label_train_twoclass)

	print feats_train.get_computed_dot_feature_matrix()
	print dfeats_train.get_computed_dot_feature_matrix()

	svm=LibLinear(C, feats_train, labels)
	svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
	svm.set_epsilon(epsilon)
	svm.set_bias_enabled(True)
	svm.train()

	svm.set_features(feats_test)
	svm.apply().get_labels()
	predictions = svm.apply()

	dfeats_train.__disown__()
	dfeats_train.parallel.set_num_threads(1)
	dsvm=LibLinear(C, dfeats_train, dlabels)
	dsvm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL)
	dsvm.set_epsilon(epsilon)
	dsvm.set_bias_enabled(True)
	dsvm.train()

	dfeats_test.__disown__()
	dfeats_test.parallel.set_num_threads(1)
	dsvm.set_features(dfeats_test)
	dsvm.apply().get_labels()
	dpredictions = dsvm.apply()

	return predictions, svm, predictions.get_labels()

if __name__=='__main__':
	print('DirectorLinear')
	features_director_dot_modular(*parameter_list[0])

../examples/documented/python_modular/features_io_modular.py

# This example shows how to read and write plain ascii files, binary files and
# hdf5 datasets.
# 
# For ascii files it shows how to obtain shogun's RealFeatures
# (a simple feature matrix of doubles with 1 column == 1 example, nr_columns ==
#  number of examples) and also sparse features in SVM light format.
# 
# Binary files use some custom native format and datasets can be read/written
# from/to hdf5 files with arbitrary group / path.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
data=lm.load_numbers('../data/fm_train_real.dat')
label=lm.load_numbers('../data/label_train_twoclass.dat')

parameter_list=[[data,label]]

def features_io_modular (fm_train_real, label_train_twoclass):
	import numpy
	from shogun.Features import SparseRealFeatures, RealFeatures, MulticlassLabels
	from shogun.Kernel import GaussianKernel
	from shogun.IO import AsciiFile, BinaryFile, HDF5File

	feats=SparseRealFeatures(fm_train_real)
	feats2=SparseRealFeatures()

	f=BinaryFile("fm_train_sparsereal.bin","w")
	feats.save(f)

	f=AsciiFile("fm_train_sparsereal.ascii","w")
	feats.save(f)

	f=BinaryFile("fm_train_sparsereal.bin")
	feats2.load(f)

	f=AsciiFile("fm_train_sparsereal.ascii")
	feats2.load(f)

	feats=RealFeatures(fm_train_real)
	feats2=RealFeatures()

	f=BinaryFile("fm_train_real.bin","w")
	feats.save(f)

	f=HDF5File("fm_train_real.h5","w", "/data/doubles")
	feats.save(f)

	f=AsciiFile("fm_train_real.ascii","w")
	feats.save(f)

	f=BinaryFile("fm_train_real.bin")
	feats2.load(f)
	#print("diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

	f=AsciiFile("fm_train_real.ascii")
	feats2.load(f)
	#print("diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())))

	lab=MulticlassLabels(numpy.array([0.0,1.0,2.0,3.0]))
	lab2=MulticlassLabels()
	f=AsciiFile("label_train_twoclass.ascii","w")
	lab.save(f)

	f=BinaryFile("label_train_twoclass.bin","w")
	lab.save(f)

	f=HDF5File("label_train_real.h5","w", "/data/labels")
	lab.save(f)

	f=AsciiFile("label_train_twoclass.ascii")
	lab2.load(f)

	f=BinaryFile("label_train_twoclass.bin")
	lab2.load(f)

	f=HDF5File("fm_train_real.h5","r", "/data/doubles")
	feats2.load(f)
	#print(feats2.get_feature_matrix())
	f=HDF5File("label_train_real.h5","r", "/data/labels")
	lab2.load(f)
	#print(lab2.get_labels())

	#clean up
	import os
	for f in ['fm_train_sparsereal.bin','fm_train_sparsereal.ascii',
			'fm_train_real.bin','fm_train_real.h5','fm_train_real.ascii',
			'label_train_real.h5', 'label_train_twoclass.ascii','label_train_twoclass.bin']:
		os.unlink(f)
	return feats, feats2, lab, lab2

if __name__=='__main__':
	print('Features IO')
	features_io_modular(*parameter_list[0])

../examples/documented/python_modular/features_read_svmlight_format_modular.py

# This example demonstrates how to read and write data in the SVMLight Format
# from Shogun.
# 

#!/usr/bin/env python
parameter_list=[['../data/train_sparsereal.light']]

def features_read_svmlight_format_modular (fname):
	import os
	from shogun.Features import SparseRealFeatures

	f=SparseRealFeatures()
	lab=f.load_svmlight_file(fname)
	f.write_svmlight_file('testwrite.light', lab)
	os.unlink('testwrite.light')

if __name__=='__main__':
	print('Reading SVMLIGHT format')
	features_read_svmlight_format_modular(*parameter_list[0])

../examples/documented/python_modular/features_snp_modular.py

# Creates features similar to the feature space of the SNP kernel. Useful when
# working with linear methods.

#!/usr/bin/env python
parameter_list=[['../data/snps.dat']]

def features_snp_modular (fname):
	from shogun.Features import StringByteFeatures, SNPFeatures, SNP

	sf=StringByteFeatures(SNP)
	sf.load_ascii_file(fname, False, SNP, SNP)
	#print(sf.get_features())
	snps=SNPFeatures(sf)
	#print(snps.get_feature_matrix())
	#print(snps.get_minor_base_string())
	#print(snps.get_major_base_string())

if __name__=='__main__':
	print('SNP Features')
	features_snp_modular(*parameter_list[0])

../examples/documented/python_modular/features_sparse_modular.py

# This example demsonstrates how to encode sparse (most entries zero),
# real-valued features in shogun using SparseRealFeatures.

#!/usr/bin/env python
import numpy
# create dense matrix A
A=numpy.array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=numpy.float64)

parameter_list=[[A]]
def features_sparse_modular (A):
	from scipy.sparse import csc_matrix
	from shogun.Features import SparseRealFeatures
	from numpy import array, float64, all

	# sparse representation X of dense matrix A
	# note, will work with types other than float64 too,
	# but requires recent scipy.sparse
	X=csc_matrix(A)
	#print(A)

	# create sparse shogun features from dense matrix A
	a=SparseRealFeatures(A)
	a_out=a.get_full_feature_matrix()
	#print(a_out)
	assert(all(a_out==A))
	#print(a_out)

	# create sparse shogun features from sparse matrix X
	a.set_sparse_feature_matrix(X)
	a_out=a.get_full_feature_matrix()
	#print(a_out)
	assert(all(a_out==A))

	# create sparse shogun features from sparse matrix X
	a=SparseRealFeatures(X)
	a_out=a.get_full_feature_matrix()
	#print(a_out)
	assert(all(a_out==A))

	# obtain (data,row,indptr) csc arrays of sparse shogun features
	z=csc_matrix(a.get_sparse_feature_matrix())
	z_out=z.todense()
	#print(z_out)
	assert(all(z_out==A))

if __name__=='__main__':
	print('Sparse Features')
	features_sparse_modular(*parameter_list[0])

../examples/documented/python_modular/features_string_char_compressed_modular.py

# This example demonstrates how to use compressed strings with shogun.
# We currently support reading and writing compressed files using 
# LZO, GZIP, BZIP2 and LZMA. Furthermore, we demonstrate how to extract
# compressed streams on-the-fly in order to fit data sets into
# memory that would be too large, otherwise.
# 

#!/usr/bin/env python
parameter_list = [['features_string_char_compressed_modular.py']]

def features_string_char_compressed_modular (fname):
	from shogun.Features import StringCharFeatures, StringFileCharFeatures, RAWBYTE
	from shogun.Library import UNCOMPRESSED,SNAPPY,LZO,GZIP,BZIP2,LZMA, MSG_DEBUG
	from shogun.Preprocessor import DecompressCharString

	f=StringFileCharFeatures(fname, RAWBYTE)

	#print("original strings", f.get_features())

	#uncompressed
	f.save_compressed("foo_uncompressed.str", UNCOMPRESSED, 1)
	f2=StringCharFeatures(RAWBYTE);
	f2.load_compressed("foo_uncompressed.str", True)
	#print("uncompressed strings", f2.get_features())
	#print

	# load compressed data and uncompress on load

	#snappy - not stable yet?!
	#f.save_compressed("foo_snappy.str", SNAPPY, 9)
	#f2=StringCharFeatures(RAWBYTE);
	#f2.load_compressed("foo_snappy.str", True)
	#print("snappy strings", f2.get_features())
	#print

	#lzo
	f.save_compressed("foo_lzo.str", LZO, 9)
	f2=StringCharFeatures(RAWBYTE);
	f2.load_compressed("foo_lzo.str", True)
	#print("lzo strings", f2.get_features())
	#print

	##gzip
	f.save_compressed("foo_gzip.str", GZIP, 9)
	f2=StringCharFeatures(RAWBYTE);
	f2.load_compressed("foo_gzip.str", True)
	#print("gzip strings", f2.get_features())
	#print

	#bzip2
	f.save_compressed("foo_bzip2.str", BZIP2, 9)
	f2=StringCharFeatures(RAWBYTE);
	f2.load_compressed("foo_bzip2.str", True)
	#print("bzip2 strings", f2.get_features())
	#print

	#lzma
	f.save_compressed("foo_lzma.str", LZMA, 9)
	f2=StringCharFeatures(RAWBYTE);
	f2.load_compressed("foo_lzma.str", True)
	#print("lzma strings", f2.get_features())
	#print

	# load compressed data and uncompress via preprocessor
	f2=StringCharFeatures(RAWBYTE);
	f2.load_compressed("foo_lzo.str", False)
	f2.add_preprocessor(DecompressCharString(LZO))
	f2.apply_preprocessor()
	#print("lzo strings", f2.get_features())
	#print

	# load compressed data and uncompress on-the-fly via preprocessor
	f2=StringCharFeatures(RAWBYTE);
	f2.load_compressed("foo_lzo.str", False)
	#f2.io.set_loglevel(MSG_DEBUG)
	f2.add_preprocessor(DecompressCharString(LZO))
	f2.enable_on_the_fly_preprocessing()
	#print("lzo strings", f2.get_features())
	#print

	#clean up
	import os
	for f in ['foo_uncompressed.str', 'foo_snappy.str', 'foo_lzo.str', 'foo_gzip.str',
	'foo_bzip2.str', 'foo_lzma.str', 'foo_lzo.str', 'foo_lzo.str']:
		if os.path.exists(f):
			os.unlink(f)

	##########################################################################################
	# some perfectly compressible stuff follows
	##########################################################################################
	##########################################################################################
	##########################################################################################
	##########################################################################################
	##########################################################################################
	##########################################################################################
	##########################################################################################
	##########################################################################################
	##########################################################################################
	##########################################################################################

if __name__=='__main__':
    print('Compressing StringCharFileFeatures')
    features_string_char_compressed_modular(*parameter_list[0])

../examples/documented/python_modular/features_string_char_modular.py

# This example demonstrates how to encode ASCII-strings (255 symbols) in shogun.

#!/usr/bin/env python
strings=['hey','guys','i','am','a','string']

parameter_list=[[strings]]

def features_string_char_modular (strings):
	from shogun.Features import StringCharFeatures, RAWBYTE
	from numpy import array

	#create string features
	f=StringCharFeatures(strings, RAWBYTE)

	#and output several stats
	#print("max string length", f.get_max_vector_length())
	#print("number of strings", f.get_num_vectors())
	#print("length of first string", f.get_vector_length(0))
	#print("string[5]", ''.join(f.get_feature_vector(5)))
	#print("strings", f.get_features())

	#replace string 0
	f.set_feature_vector(array(['t','e','s','t']), 0)

	#print("strings", f.get_features())
	return f.get_features(), f

if __name__=='__main__':
	print('StringCharFeatures')
	features_string_char_modular(*parameter_list[0])

../examples/documented/python_modular/features_string_file_char_modular.py

# This example demonstrates how to load ASCII features from a file into shogun.

#!/usr/bin/env python
parameter_list = [['features_string_file_char_modular.py']]

def features_string_file_char_modular (fname):
	from shogun.Features import StringFileCharFeatures, RAWBYTE
	f = StringFileCharFeatures(fname, RAWBYTE)
	#print("strings", f.get_features())
	return f

if __name__=='__main__':
    print('Compressing StringCharFileFeatures')
    features_string_file_char_modular(*parameter_list[0])

../examples/documented/python_modular/features_string_file_modular.py

# This example demonstrates how to load string features from files.
# We cover two cases: First, we show how to obtain StringCharFeatues
# from a directory of text files (particularly useful in computational biology)
# and second, we demonstrate how to load StringCharFeatues from one (multi-line) file. 
# 

#!/usr/bin/env python
parameter_list=[[".", "features_string_char_modular.py"]]

def features_string_file_modular (directory, fname):
	from shogun.Features import StringCharFeatures, RAWBYTE
	from shogun.IO import AsciiFile

	# load features from directory
	f=StringCharFeatures(RAWBYTE)
	f.load_from_directory(directory)

	#and output several stats
	#print("max string length", f.get_max_vector_length())
	#print("number of strings", f.get_num_vectors())
	#print("length of first string", f.get_vector_length(0))
	#print("str[0,0:3]", f.get_feature(0,0), f.get_feature(0,1), f.get_feature(0,2))
	#print("len(str[0])", f.get_vector_length(0))
	#print("str[0]", f.get_feature_vector(0))

	#or load features from file (one string per line)
	fil=AsciiFile(fname)
	f.load(fil)
	#print(f.get_features())

	#or load fasta file
	#f.load_fasta('fasta.fa')
	#print(f.get_features())
	return f.get_features(), f

if __name__=='__main__':
	print('StringWordFeatures')
	features_string_file_modular(*parameter_list[0])

../examples/documented/python_modular/features_string_hashed_wd_modular.py

# This creates a HashedWDFeatures object, i.e. an approximation to the Weighted
# Degree kernel feature space via hashes. These features can be particularly fast
# in linear SVM solvers.

#!/usr/bin/env python
from shogun.Features import LongIntFeatures
from numpy import array, int64, all

# create dense matrix A
matrix=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64)

parameter_list = [[matrix,3,1,2],[matrix,3,1,2]]

# ... of type LongInt
def features_string_hashed_wd_modular (A=matrix,order=3,start_order=1,hash_bits=2):
    a=LongIntFeatures(A)
    
    from numpy import array, uint8
    from shogun.Features import HashedWDFeatures, StringByteFeatures, RAWDNA
    from shogun.IO import MSG_DEBUG

    x=[array([0,1,2,3,0,1,2,3,3,2,2,1,1],dtype=uint8)]
    from_order=order
    f=StringByteFeatures(RAWDNA)
    #f.io.set_loglevel(MSG_DEBUG)
    f.set_features(x)

    y=HashedWDFeatures(f,start_order,order,from_order,hash_bits)
    fm=y.get_computed_dot_feature_matrix()

    return fm

if __name__=='__main__':
    print('string_hashed_wd')
    features_string_hashed_wd_modular(*parameter_list[0])

../examples/documented/python_modular/features_string_sliding_window_modular.py

# In this example, we demonstrate how to obtain string features
# by using a sliding window in a memory-efficient way. Instead of copying
# the string for each position of the sliding window, we only store a reference
# with respect to the complete string. This is particularly useful, when working
# with genomic data, where storing all explicitly copied strings in memory
# quickly becomes infeasible. In addition to a sliding window (of a particular
# length) over all position, we also support defining a custom position
# list.

#!/usr/bin/env python
# create string features with a single string
s=10*'A' + 10*'C' + 10*'G' + 10*'T'

parameter_list=[[s]]

def features_string_sliding_window_modular (strings):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Library import DynamicIntArray

	f=StringCharFeatures([strings], DNA)

	# slide a window of length 5 over features
	# (memory efficient, does not copy strings)
	f.obtain_by_sliding_window(5,1)
	#print(f.get_num_vectors())
	#print(f.get_vector_length(0))
	#print(f.get_vector_length(1))
	#print(f.get_features())

	# slide a window of length 4 over features
	# (memory efficient, does not copy strings)
	f.obtain_by_sliding_window(4,1)
	#print(f.get_num_vectors())
	#print(f.get_vector_length(0))
	#print(f.get_vector_length(1))
	#print(f.get_features())

	# extract string-windows at position 0,6,16,25 of window size 4
	# (memory efficient, does not copy strings)
	f.set_features([s])
	positions=DynamicIntArray()
	positions.append_element(0)
	positions.append_element(6)
	positions.append_element(16)
	positions.append_element(25)

	f.obtain_by_position_list(4,positions)
	#print(f.get_features())

	# now extract windows of size 8 from same positon list
	f.obtain_by_position_list(8,positions)
	#print(f.get_features())
	return f

if __name__=='__main__':
	print('Sliding Window')
	features_string_sliding_window_modular(*parameter_list[0])

../examples/documented/python_modular/features_string_ulong_modular.py

# This example demonstrates how to encode string
# features efficiently by creating a more compactly encoded
# bit-string from StringCharFeatures.
# For instance, when working with the DNA alphabet {A,T,G,C}
# using 1 char = 1 byte per symbol would be wasteful, as we
# can encode 4 symbols using 2 bits only.
# Here, this is done in junks of 64bit (ulong).

#!/usr/bin/env python

parameter_list = [[0,2,0,False],[0,3,0,False]]

def features_string_ulong_modular (start=0,order=2,gap=0,rev=False):
    
    from shogun.Features import StringCharFeatures, StringUlongFeatures, RAWBYTE
    from numpy import array, uint64
    
#create string features
    cf=StringCharFeatures(['hey','guys','string'], RAWBYTE)
    uf=StringUlongFeatures(RAWBYTE)
    
    uf.obtain_from_char(cf, start,order,gap,rev)
    
#replace string 0
    uf.set_feature_vector(array([1,2,3,4,5], dtype=uint64), 0)
    

    return uf.get_features(),uf.get_feature_vector(2), uf.get_num_vectors()

if __name__=='__main__':
    print('simple_longint')
    features_string_ulong_modular(*parameter_list[0])

../examples/documented/python_modular/features_string_word_modular.py

# This example demonstrates how to encode string
# features efficiently by creating a more compactly encoded
# bit-string from StringCharFeatures.
# For instance, when working with the DNA alphabet {A,T,G,C}
# using 1 char = 1 byte per symbol would be wasteful, as we
# can encode 4 symbols using 2 bits only.
# Here, this is done in junks of 16bit (word).

#!/usr/bin/env python
strings=['hey','guys','string']

parameter_list=[[strings,0,2,0,False]]

def features_string_word_modular (strings, start, order, gap, rev):
	from shogun.Features import StringCharFeatures, StringWordFeatures, RAWBYTE
	from numpy import array, uint16

	#create string features
	cf=StringCharFeatures(strings, RAWBYTE)
	wf=StringWordFeatures(RAWBYTE)

	wf.obtain_from_char(cf, start, order, gap, rev)

	#and output several stats
	#print("max string length", wf.get_max_vector_length())
	#print("number of strings", wf.get_num_vectors())
	#print("length of first string", wf.get_vector_length(0))
	#print("string[2]", wf.get_feature_vector(2))
	#print("strings", wf.get_features())

	#replace string 0
	wf.set_feature_vector(array([1,2,3,4,5], dtype=uint16), 0)

	#print("strings", wf.get_features())
	return wf.get_features(), wf

if __name__=='__main__':
	print('StringWordFeatures')
	features_string_word_modular(*parameter_list[0])

Kernel

../examples/documented/python_modular/kernel_anova_modular.py

# In this example the ANOVA kernel is being computed for toy data.

#!/usr/bin/env python
###########################################################################
# anova kernel
###########################################################################
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()

traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = double(lm.load_numbers('../data/fm_test_real.dat'))
parameter_list = [[traindat,testdat,2,10], [traindat,testdat,5,10]]

def kernel_anova_modular (fm_train_real=traindat,fm_test_real=testdat,cardinality=2, size_cache=10):
	from shogun.Kernel import ANOVAKernel
	from shogun.Features import RealFeatures
	
	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	kernel=ANOVAKernel(feats_train, feats_train, cardinality, size_cache)
        
	for i in range(0,feats_train.get_num_vectors()):
		for j in range(0,feats_train.get_num_vectors()):
			k1 = kernel.compute_rec1(i,j)
			k2 = kernel.compute_rec2(i,j)
			#if abs(k1-k2) > 1e-10:
			#	print("|%s|%s|" % (k1, k2))

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train, km_test, kernel

if __name__=='__main__':
	print('ANOVA')
	kernel_anova_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_auc_modular.py

# This example demonstrates the use of the AUC Kernel.

#!/usr/bin/env python
###########################################################################
# kernel can be used to maximize AUC instead of margin in SVMs 
###########################################################################
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()

traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,1.7], [traindat,testdat,1.6]]


def kernel_auc_modular (fm_train_real=traindat,label_train_real=testdat,width=1.7):


	from shogun.Kernel import GaussianKernel, AUCKernel
	from shogun.Features import RealFeatures, BinaryLabels

	feats_train=RealFeatures(fm_train_real)

	subkernel=GaussianKernel(feats_train, feats_train, width)

	kernel=AUCKernel(0, subkernel)
	kernel.setup_auc_maximization( BinaryLabels(label_train_real) )
	km_train=kernel.get_kernel_matrix()
	return kernel

if __name__=='__main__':
	print('AUC')
	kernel_auc_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_cauchy_modular.py

# In this example the Cauchy kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 10.0]]

def kernel_cauchy_modular (fm_train_real=traindat,fm_test_real=testdat, sigma=1.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import CauchyKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=CauchyKernel(feats_train, feats_train, sigma, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Cauchy')
	kernel_cauchy_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_chi2_modular.py

# This is an example for the initialization of the chi2-kernel on real data, where 
# each column of the matrices corresponds to one training/test example. 

#!/usr/bin/env python
###########################################################################
# chi2 kernel
###########################################################################
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()

traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = double(lm.load_numbers('../data/fm_test_real.dat'))
parameter_list = [[traindat,testdat,1.4,10], [traindat,testdat,1.5,10]]

def kernel_chi2_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4, size_cache=10):
	from shogun.Kernel import Chi2Kernel
	from shogun.Features import RealFeatures
	
	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	
	kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('Chi2')
	kernel_chi2_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_circular_modular.py

# In this example the circular kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]

def kernel_circular_modular (fm_train_real=traindat,fm_test_real=testdat, sigma=1.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import CircularKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=CircularKernel(feats_train, feats_train, sigma, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Circular')
	kernel_circular_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_combined_custom_poly_modular.py

# In this example the combined kernel of custom kernel and poly kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')
parameter_list= [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]]


def kernel_combined_custom_poly_modular (fm_train_real = traindat,fm_test_real = testdat,fm_label_twoclass=label_traindat):
    from shogun.Features import CombinedFeatures, RealFeatures, BinaryLabels
    from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel
    from shogun.Classifier import LibSVM
   
    kernel = CombinedKernel()
    feats_train = CombinedFeatures()
    
    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, tfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))
        
    subkfeats_train = RealFeatures(fm_train_real)
    feats_train.append_feature_obj(subkfeats_train)
    subkernel = PolyKernel(10,2)
    kernel.append_kernel(subkernel)

    kernel.init(feats_train, feats_train)
    
    labels = BinaryLabels(fm_label_twoclass)
    svm = LibSVM(1.0, kernel, labels)
    svm.train()

    kernel = CombinedKernel()
    feats_pred = CombinedFeatures()

    pfeats = RealFeatures(fm_test_real)
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, pfeats)
    K = tkernel.get_kernel_matrix()
    kernel.append_kernel(CustomKernel(K))

    subkfeats_test = RealFeatures(fm_test_real)
    feats_pred.append_feature_obj(subkfeats_test)
    subkernel = PolyKernel(10, 2)
    kernel.append_kernel(subkernel)
    kernel.init(feats_train, feats_pred)

    svm.set_kernel(kernel)
    svm.apply()
    km_train=kernel.get_kernel_matrix()
    return km_train,kernel

if __name__=='__main__':
    kernel_combined_custom_poly_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_combined_modular.py

# This is an example for the initialization of a combined kernel, which is a weighted sum of 
# in this case three kernels on real valued data. The sub-kernel weights are all set to 1. 
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()

traindat = double(lm.load_numbers('../data/fm_train_real.dat'))
testdat = double(lm.load_numbers('../data/fm_test_real.dat'))
traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')
 
parameter_list = [[traindat,testdat,traindna,testdna],[traindat,testdat,traindna,testdna]]
def kernel_combined_modular (fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ):
	from shogun.Kernel import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel
	from shogun.Features import RealFeatures, StringCharFeatures, CombinedFeatures, DNA

	kernel=CombinedKernel()
	feats_train=CombinedFeatures()
	feats_test=CombinedFeatures()

	subkfeats_train=RealFeatures(fm_train_real)
	subkfeats_test=RealFeatures(fm_test_real)
	subkernel=GaussianKernel(10, 1.1)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
	subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
	degree=3
	subkernel=FixedDegreeStringKernel(10, degree)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train=StringCharFeatures(fm_train_dna, DNA)
	subkfeats_test=StringCharFeatures(fm_test_dna, DNA)
	subkernel=LocalAlignmentStringKernel(10)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	kernel.init(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('Combined')
	kernel_combined_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_comm_ulong_string_modular.py

# This is an example for the initialization of the CommUlongString-kernel. This kernel 
# sums over k-mere matches (k='order'). For efficient computing a preprocessor is used 
# that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted 
# only once. 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat =lm.load_dna('../data/fm_train_dna.dat')
testdat =  lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,3,0,False ],[traindat,testdat,4,0,False]]

def kernel_comm_ulong_string_modular (fm_train_dna=traindat,fm_test_dna=testdat, order=3, gap=0, reverse = False):

	from shogun.Kernel import CommUlongStringKernel
	from shogun.Features import StringUlongFeatures, StringCharFeatures, DNA
	from shogun.Preprocessor import SortUlongString

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_train_dna)
	feats_train=StringUlongFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortUlongString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()


	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringUlongFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	use_sign=False

	kernel=CommUlongStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('CommUlongString')
	kernel_comm_ulong_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_comm_word_string_modular.py

# This is an example for the initialization of the CommWordString-kernel (aka
# Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel 
# sums over k-mere matches (k='order'). For efficient computing a preprocessor is used 
# that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted 
# only once. 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list = [[traindat,testdat,4,0,False, False],[traindat,testdat,4,0,False,False]]

def kernel_comm_word_string_modular (fm_train_dna=traindat, fm_test_dna=testdat, order=3, gap=0, reverse = False, use_sign = False):
	
	from shogun.Kernel import CommWordStringKernel
	from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
	from shogun.Preprocessor import SortWordString
	
	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_train_dna)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	kernel=CommWordStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('CommWordString')
	kernel_comm_word_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_const_modular.py

# The constant kernel gives a trivial kernel matrix with all entries set to the same value 
# defined by the argument 'c'. 
# 

#!/usr/bin/env python
parameter_list =[[23],[24]]

def kernel_const_modular (c=23):
	from shogun.Features import DummyFeatures
	from shogun.Kernel import ConstKernel

	feats_train=DummyFeatures(10)
	feats_test=DummyFeatures(17)
	
	kernel=ConstKernel(feats_train, feats_train, c)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('Const')
	kernel_const_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_custom_modular.py

# A user defined custom kernel is assigned in this example, for which only the lower triangle 
# may be given (set_triangle_kernel_matrix_from_triangle) or 
# a full matrix (set_full_kernel_matrix_from_full), or a full matrix which is then internally stored as a 
# triangle (set_triangle_kernel_matrix_from_full). Labels for the examples are given, a svm is trained and 
# the svm is used to classify the examples. 
# 

#!/usr/bin/env python
from numpy.random import seed
seed(42)

parameter_list=[[7],[8]]

def kernel_custom_modular (dim=7):
	from numpy.random import rand, seed
	from numpy import array, float32
	from shogun.Features import RealFeatures
	from shogun.Kernel import CustomKernel

	seed(17)
	data=rand(dim, dim)
	feats=RealFeatures(data)
	symdata=data+data.T
	lowertriangle=array([symdata[(x,y)] for x in range(symdata.shape[1])
		for y in range(symdata.shape[0]) if y<=x])

	kernel=CustomKernel()

	# once with float64's
	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kernel.get_kernel_matrix()

	kernel.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kernel.get_kernel_matrix()

	kernel.set_full_kernel_matrix_from_full(symdata)
	km_fullfull=kernel.get_kernel_matrix()

	# now once with float32's
	data=array(data,dtype=float32)

	kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle)
	km_triangletriangle=kernel.get_kernel_matrix()

	kernel.set_triangle_kernel_matrix_from_full(symdata)
	km_fulltriangle=kernel.get_kernel_matrix()

	kernel.set_full_kernel_matrix_from_full(symdata)
	km_fullfull=kernel.get_kernel_matrix()
	return km_fullfull,kernel

if __name__=='__main__':
	print('Custom')
	kernel_custom_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_diag_modular.py

# This is an example for the initialization of the diag-kernel. 
# The diag kernel has all kernel matrix entries but those on 
# the main diagonal set to zero. 

#!/usr/bin/env python
parameter_list =[[23],[24]]
def kernel_diag_modular (diag=23):
	from shogun.Features import DummyFeatures
	from shogun.Kernel import DiagKernel

	feats_train=DummyFeatures(10)
	feats_test=DummyFeatures(17)

	kernel=DiagKernel(feats_train, feats_train, diag)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('Diag')
	kernel_diag_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_director_linear_modular.py

#!/usr/bin/env python
import numpy
from shogun.Features import RealFeatures
try:
	from shogun.Kernel import DirectorKernel
except ImportError:
	print "recompile shogun with --enable-swig-directors"
	import sys
	sys.exit(0)


class DirectorLinearKernel(DirectorKernel):
	def __init__(self):
		DirectorKernel.__init__(self, True)
	def kernel_function(self, idx_a, idx_b):
		seq1 = self.get_lhs().get_feature_vector(idx_a)
               	seq2 = self.get_rhs().get_feature_vector(idx_b)
		return numpy.dot(seq1, seq2)

traindat = numpy.random.random_sample((10,10))
testdat = numpy.random.random_sample((10,10))
parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.4]]

def kernel_director_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):

	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
	from modshogun import Time

	feats_train=RealFeatures(fm_train_real)
	feats_train.io.set_loglevel(0)
	feats_train.parallel.set_num_threads(1)
	feats_test=RealFeatures(fm_test_real)
	 
	kernel=LinearKernel()
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)

	dkernel=DirectorLinearKernel()
	dkernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	dkernel.init(feats_train, feats_train)

	print  "km_train"
	t=Time()
	km_train=kernel.get_kernel_matrix()
	t1=t.cur_time_diff(True)

	print  "dkm_train"
	t=Time()
	dkm_train=dkernel.get_kernel_matrix()
	t2=t.cur_time_diff(True)	

	print "km_train", km_train
	print "dkm_train", dkm_train

	return km_train, dkm_train

if __name__=='__main__':
	print('DirectorLinear')
	kernel_director_linear_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_distance_modular.py

# With the distance kernel one can use any of the following distance metrics: 
# BrayCurtisDistance()
# CanberraMetric()
# CanberraWordDistance()
# ChebyshewMetric()
# ChiSquareDistance()
# CosineDistance()
# Distance()
# EuclidianDistance()
# GeodesicMetric()
# HammingWordDistance()
# JensenMetric()
# ManhattanMetric()
# ManhattanWordDistance()
# MinkowskiMetric()
# RealDistance()
# SimpleDistance()
# SparseDistance()
# SparseEuclidianDistance()
# StringDistance()
# TanimotoDistance()
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import double
lm=LoadMatrix()

traindat = double(lm.load_numbers('../data/fm_test_real.dat'))
testdat = double(lm.load_numbers('../data/fm_train_real.dat'))
parameter_list=[[traindat,testdat,1.7],[traindat,testdat,1.8]]

def kernel_distance_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.7):
	from shogun.Kernel import DistanceKernel
	from shogun.Features import RealFeatures
	from shogun.Distance import EuclideanDistance
	
	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance()

	kernel=DistanceKernel(feats_train, feats_test, width, distance)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('Distance')
	kernel_distance_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_distantsegments_modular.py

# In this example the distant segments kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat,testdat,5,5],[traindat,testdat,6,6]]

def kernel_distantsegments_modular (fm_train_dna=traindat,fm_test_dna=testdat,delta=5, theta=5):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import DistantSegmentsKernel

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	feats_test=StringCharFeatures(fm_test_dna, DNA)
	
	kernel=DistantSegmentsKernel(feats_train, feats_train, 10, delta, theta)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

	return km_train, km_test, kernel


if __name__=='__main__':
	print('DistantSegments')
	kernel_distantsegments_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_exponential_modular.py

# In this example the exponential kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]

def kernel_exponential_modular (fm_train_real=traindat,fm_test_real=testdat, tau_coef=1.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import ExponentialKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	distance = EuclideanDistance(feats_train, feats_train)
	
	kernel=ExponentialKernel(feats_train, feats_train, tau_coef, distance, 10)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Exponential')
	kernel_exponential_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_fisher_modular.py

# The class FKFeatures implements Fischer kernel features obtained from
# two Hidden Markov models.
# 
# It was used in
# 
# K. Tsuda, M. Kawanabe, G. Raetsch, S. Sonnenburg, and K.R. Mueller. A new
# discriminative kernel from probabilistic models. Neural Computation,
# 14:2397-2414, 2002.
# 
# which also has the details.
# 
# Note that FK-features are computed on the fly, so to be effective feature
# caching should be enabled.
# 
# It inherits its functionality from CSimpleFeatures, which should be
# consulted for further reference.
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()

traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')
parameter_list = [[traindat,testdat,label_traindat,1,4,1e-1,1,0,False,[1,False,True]],[traindat,testdat,label_traindat,3,4,1e-1,1,0,False,[1,False,True]]]

fm_hmm_pos=[ traindat[i] for i in where([label_traindat==1])[1] ]
fm_hmm_neg=[ traindat[i] for i in where([label_traindat==-1])[1] ]

def kernel_fisher_modular (fm_train_dna=traindat, fm_test_dna=testdat,
		label_train_dna=label_traindat, 
		N=1,M=4,pseudo=1e-1,order=1,gap=0,reverse=False,
		kargs=[1,False,True]):

	from shogun.Features import StringCharFeatures, StringWordFeatures, FKFeatures, DNA
	from shogun.Kernel import PolyKernel
	from shogun.Distribution import HMM, BW_NORMAL#, MSG_DEBUG
	
	# train HMM for positive class
	charfeat=StringCharFeatures(fm_hmm_pos, DNA)
	#charfeat.io.set_loglevel(MSG_DEBUG)
	hmm_pos_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	pos=HMM(hmm_pos_train, N, M, pseudo)
	pos.baum_welch_viterbi_train(BW_NORMAL)

	# train HMM for negative class
	charfeat=StringCharFeatures(fm_hmm_neg, DNA)
	hmm_neg_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	neg=HMM(hmm_neg_train, N, M, pseudo)
	neg.baum_welch_viterbi_train(BW_NORMAL)

	# Kernel training data
	charfeat=StringCharFeatures(fm_train_dna, DNA)
	wordfeats_train=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	# Kernel testing data
	charfeat=StringCharFeatures(fm_test_dna, DNA)
	wordfeats_test=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	# get kernel on training data
	pos.set_observations(wordfeats_train)
	neg.set_observations(wordfeats_train)
	feats_train=FKFeatures(10, pos, neg)
	feats_train.set_opt_a(-1) #estimate prior
	kernel=PolyKernel(feats_train, feats_train, *kargs)
	km_train=kernel.get_kernel_matrix()

	# get kernel on testing data
	pos_clone=HMM(pos)
	neg_clone=HMM(neg)
	pos_clone.set_observations(wordfeats_test)
	neg_clone.set_observations(wordfeats_test)
	feats_test=FKFeatures(10, pos_clone, neg_clone)
	feats_test.set_a(feats_train.get_a()) #use prior from training data
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print("Fisher Kernel")
	kernel_fisher_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_fixed_degree_string_modular.py

# The FixedDegree String kernel takes as input two strings of same size and counts the number of matches of length d.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindat, testdat,3],[traindat,testdat,4]]

def kernel_fixed_degree_string_modular (fm_train_dna=traindat, fm_test_dna=testdat,degree=3):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import FixedDegreeStringKernel

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=FixedDegreeStringKernel(feats_train, feats_train, degree)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

	return km_train,km_test,kernel
if __name__=='__main__':
	print('FixedDegreeString')
	kernel_fixed_degree_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_gaussian_modular.py

# The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.3],[traindat,testdat, 1.4]]

def kernel_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat, width=1.3):
	from shogun.Features import RealFeatures
	from shogun.Kernel import GaussianKernel

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	kernel=GaussianKernel(feats_train, feats_train, width)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Gaussian')
	kernel_gaussian_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_gaussian_shift_modular.py

# An experimental kernel inspired by the WeightedDegreePositionStringKernel and the Gaussian kernel.
# The idea is to shift the dimensions of the input vectors against eachother. 'shift_step' is the step 
# size of the shifts and  max_shift is the maximal shift.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat,1.8,2,1],[traindat,testdat,1.9,2,1]]

def kernel_gaussian_shift_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.8,max_shift=2,shift_step=1):
	from shogun.Features import RealFeatures
	from shogun.Kernel import GaussianShiftKernel

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	kernel=GaussianShiftKernel(feats_train, feats_train, width, max_shift, shift_step)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

	return km_train,km_test,kernel

if __name__=='__main__':
	print('GaussianShift')
	kernel_gaussian_shift_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_histogram_word_string_modular.py

# The HistogramWordString computes the TOP kernel on inhomogeneous Markov Chains.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindat,testdat,label_traindat,3,0,False],[traindat,testdat,label_traindat,3,0,False]]

def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,gap=0,reverse=False):

	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels
	from shogun.Kernel import HistogramWordStringKernel
	from shogun.Classifier import PluginEstimate#, MSG_DEBUG

	reverse = reverse
	charfeat=StringCharFeatures(DNA)
	#charfeat.io.set_loglevel(MSG_DEBUG)
	charfeat.set_features(fm_train_dna)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	pie=PluginEstimate()
	labels=BinaryLabels(label_train_dna)
	pie.set_labels(labels)
	pie.set_features(feats_train)
	pie.train()

	kernel=HistogramWordStringKernel(feats_train, feats_train, pie)
	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	pie.set_features(feats_test)
	pie.apply().get_labels()
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('PluginEstimate w/ HistogramWord')
	kernel_histogram_word_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_inversemultiquadric_modular.py

# In this example the inverse multiquadic kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]

def kernel_inversemultiquadric_modular (fm_train_real=traindat,fm_test_real=testdat, shift_coef=1.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import InverseMultiQuadricKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=InverseMultiQuadricKernel(feats_train, feats_train, shift_coef, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('InverseMultiquadric')
	kernel_inversemultiquadric_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_io_modular.py

# example on saving a kernel to a file

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.9],[traindat,testdat,1.7]]

def kernel_io_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.9):
	from shogun.Features import RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.IO import AsciiFile, BinaryFile
	
	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)


	kernel=GaussianKernel(feats_train, feats_train, width)
	km_train=kernel.get_kernel_matrix()
	f=AsciiFile("gaussian_train.ascii","w")
	kernel.save(f)
	del f

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	f=AsciiFile("gaussian_test.ascii","w")
	kernel.save(f)
	del f

	#clean up
	import os
	os.unlink("gaussian_test.ascii")
	os.unlink("gaussian_train.ascii")
	
	return km_train, km_test, kernel

if __name__=='__main__':
	print('Gaussian')
	kernel_io_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_linear_byte_modular.py

# This is an example for the initialization of a linear kernel on raw byte
# data. 

#!/usr/bin/env python
###########################################################################
# linear kernel on byte features
###########################################################################
from tools.load import LoadMatrix
from numpy import ubyte
lm=LoadMatrix()

traindat = ubyte(lm.load_numbers('../data/fm_train_byte.dat'))
testdat = ubyte(lm.load_numbers('../data/fm_test_byte.dat'))

parameter_list=[[traindat,testdat],[traindat,testdat]]

def kernel_linear_byte_modular (fm_train_byte=traindat,fm_test_byte=testdat):
	from shogun.Kernel import LinearKernel
	from shogun.Features import ByteFeatures

	feats_train=ByteFeatures(fm_train_byte)
	feats_test=ByteFeatures(fm_test_byte)

	kernel=LinearKernel(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return kernel

if __name__=='__main__':
	print('LinearByte')
	kernel_linear_byte_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_linear_modular.py

# This is an example for the initialization of a linear kernel on real valued 
# data using scaling factor 1.2. 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.4]]

def kernel_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2):

	from shogun.Features import RealFeatures
	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	 
	kernel=LinearKernel()
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('Linear')
	kernel_linear_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_linear_string_modular.py

# This is an example for the initialization of a linear kernel on string data. The 
# strings are all of the same length and consist of the characters 'ACGT' corresponding 
# to the DNA-alphabet. Each column of the matrices of type char corresponds to 
# one training/test example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list=[[traindat,testdat],[traindat,testdat]]

def kernel_linear_string_modular (fm_train_dna=traindat,fm_test_dna=testdat):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import LinearStringKernel

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=LinearStringKernel(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	from tools.load import LoadMatrix
	print('LinearString')
	kernel_linear_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_linear_word_modular.py

# This is an example for the initialization of a linear kernel on word (2byte) 
# data. 

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import ushort

lm=LoadMatrix()
traindat = ushort(lm.load_numbers('../data/fm_train_word.dat'))
testdat = ushort(lm.load_numbers('../data/fm_test_word.dat'))

parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.2]]

def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scale=1.2):
	
	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer
	from shogun.Features import WordFeatures

	feats_train=WordFeatures(fm_train_word)
	feats_test=WordFeatures(fm_test_word)

	kernel=LinearKernel(feats_train, feats_train)
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return kernel

if __name__=='__main__':
	print('LinearWord')
	kernel_linear_word_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_local_alignment_string_modular.py

# This is an example for the initialization of the local alignment kernel on 
# DNA sequences, where each column of the matrices of type char corresponds to 
# one training/test example. 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def kernel_local_alignment_string_modular (fm_train_dna=traindat,fm_test_dna=testdat):

	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import LocalAlignmentStringKernel

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=LocalAlignmentStringKernel(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()
	
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('LocalAlignmentString')
	kernel_local_alignment_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_locality_improved_string_modular.py

#  The LocalityImprovedString kernel is inspired by the polynomial kernel.
# Comparing neighboring characters it puts emphasize on local features.
# 
# It can be defined as
# K({\bf x},{\bf x'})=\left(\sum_{i=0}^{T-1}\left(\sum_{j=-l}^{+l}w_jI_{i+j}({\bf x},{\bf x'})\right)^{d_1}\right)^{d_2},
# where
# I_i({\bf x},{\bf x'})=1 
# if $x_i=x'_i and 0 otherwise.
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindat,testdat,5,5,7],[traindat,testdat,5,5,7]]

def kernel_locality_improved_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,length=5,inner_degree=5,outer_degree=7):

	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import LocalityImprovedStringKernel
	
	feats_train=StringCharFeatures(fm_train_dna, DNA)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=LocalityImprovedStringKernel(
		feats_train, feats_train, length, inner_degree, outer_degree)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('LocalityImprovedString')
	kernel_locality_improved_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_log_modular.py

# In this example the log kernel (logarithm of the distance powered by degree plus one) is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 2.0],[traindat,testdat, 3.0]]

def kernel_log_modular (fm_train_real=traindat,fm_test_real=testdat, degree=2.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import LogKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=LogKernel(feats_train, feats_train, degree, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Log')
	kernel_log_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_match_word_string_modular.py

# In this example the match word string kernel is being computed for toy data

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat,testdat, 3,1.4,10,3,0,False],[
traindat,testdat, 3,1.4,10,3,0,False]]

def kernel_match_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat, 
degree=3,scale=1.4,size_cache=10,order=3,gap=0,reverse=False):
	from shogun.Kernel import MatchWordStringKernel, AvgDiagKernelNormalizer
	from shogun.Features import StringWordFeatures, StringCharFeatures, DNA

	charfeat=StringCharFeatures(fm_train_dna, DNA)
	feats_train=StringWordFeatures(DNA)
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	charfeat=StringCharFeatures(fm_test_dna, DNA)
	feats_test=StringWordFeatures(DNA)
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	kernel=MatchWordStringKernel(size_cache, degree)
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
	
if __name__=='__main__':
	print('MatchWordString')
	kernel_match_word_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_multiquadric_modular.py

# In this example the multiquadric kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]

def kernel_multiquadric_modular (fm_train_real=traindat,fm_test_real=testdat, shift_coef=1.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import MultiquadricKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=MultiquadricKernel(feats_train, feats_train, shift_coef, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Multiquadric')
	kernel_multiquadric_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_oligo_string_modular.py

# This is an example initializing the oligo string kernel which takes distances 
# between matching oligos (k-mers) into account via a gaussian. Variable 'k' defines the length 
# of the oligo and variable 'w' the width of the gaussian. The oligo string kernel is 
# implemented for the DNA-alphabet 'ACGT'. 
#  

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat,testdat,3,1.2,10],[traindat,testdat,4,1.3,10]]

def kernel_oligo_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,k=3,width=1.2,size_cache=10):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import OligoStringKernel

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=OligoStringKernel(size_cache, k, width)
	kernel.init(feats_train, feats_train)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('OligoString')
	kernel_oligo_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_poly_match_string_modular.py

# In this example the poly match string kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat,testdat,3,False],[traindat,testdat,4,False]]
def kernel_poly_match_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=3,inhomogene=False):
	from shogun.Kernel import PolyMatchStringKernel
	from shogun.Features import StringCharFeatures, DNA

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	feats_test=StringCharFeatures(fm_train_dna, DNA)

	kernel=PolyMatchStringKernel(feats_train, feats_train, degree, inhomogene)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('PolyMatchString')
	kernel_poly_match_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_poly_match_word_string_modular.py

# This is an example for the initialization of the PolyMatchString kernel on string data. 
# The PolyMatchString kernel sums over the matches of two stings of the same length and 
# takes the sum to the power of 'degree'. The strings consist of the characters 'ACGT' corresponding 
# to the DNA-alphabet. Each column of the matrices of type char corresponds to 
# one training/test example.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat,testdat,2,True,3,0,False],[traindat,testdat,2,True,3,0,False]]

def kernel_poly_match_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,
degree=2,inhomogene=True,order=3,gap=0,reverse=False):
	from shogun.Kernel import PolyMatchWordStringKernel
	from shogun.Features import StringWordFeatures, StringCharFeatures, DNA



	charfeat=StringCharFeatures(fm_train_dna, DNA)
	feats_train=StringWordFeatures(DNA)
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	charfeat=StringCharFeatures(fm_test_dna, DNA)
	feats_test=StringWordFeatures(DNA)
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	kernel=PolyMatchWordStringKernel(feats_train, feats_train, degree, inhomogene)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('PolyMatchWordString')
	kernel_poly_match_word_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_poly_modular.py

# This example initializes the polynomial kernel with real data. 
# If variable 'inhomogene' is 'True' +1 is added to the scalar product 
# before taking it to the power of 'degree'. If 'use_normalization' is 
# set to 'true' then kernel matrix will be normalized by the square roots
# of the diagonal entries. 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,4,False,True],[traindat,testdat,5,False,True]]

def kernel_poly_modular (fm_train_real=traindat,fm_test_real=testdat,degree=4,inhomogene=False,
	use_normalization=True):
	from shogun.Features import RealFeatures
	from shogun.Kernel import PolyKernel

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	kernel=PolyKernel(
		feats_train, feats_train, degree, inhomogene, use_normalization)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
if __name__=='__main__':
	print('Poly')
	kernel_poly_modular (*parameter_list[0])

../examples/documented/python_modular/kernel_power_modular.py

# In this example the power kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 2.0],[traindat,testdat, 3.0]]

def kernel_power_modular (fm_train_real=traindat,fm_test_real=testdat, degree=2.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import PowerKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=PowerKernel(feats_train, feats_train, degree, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Power')
	kernel_power_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_rationalquadratic_modular.py

# In this example the rational quadratic kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]

def kernel_rationalquadratic_modular (fm_train_real=traindat,fm_test_real=testdat, shift_coef=1.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import RationalQuadraticKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=RationalQuadraticKernel(feats_train, feats_train, shift_coef, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('RationalQuadratic')
	kernel_rationalquadratic_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_salzberg_word_string_modular.py

# The SalzbergWordString kernel implements the Salzberg kernel.
# 
# It is described in
# 
# Engineering Support Vector Machine Kernels That Recognize Translation Initiation Sites
# A. Zien, G.Raetsch, S. Mika, B. Schoelkopf, T. Lengauer, K.-R. Mueller
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')

parameter_list = [[traindat,testdat,label_traindat,3,0,False],[traindat,testdat,label_traindat,3,0,False]]
def kernel_salzberg_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,
order=3,gap=0,reverse=False):
	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, BinaryLabels
	from shogun.Kernel import SalzbergWordStringKernel
	from shogun.Classifier import PluginEstimate

	charfeat=StringCharFeatures(fm_train_dna, DNA)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	charfeat=StringCharFeatures(fm_test_dna, DNA)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	pie=PluginEstimate()
	labels=BinaryLabels(label_train_dna)
	pie.set_labels(labels)
	pie.set_features(feats_train)
	pie.train()

	kernel=SalzbergWordStringKernel(feats_train, feats_train, pie, labels)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	pie.set_features(feats_test)
	pie.apply().get_labels()
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('PluginEstimate w/ SalzbergWord')
	kernel_salzberg_word_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_sigmoid_modular.py

# The standard Sigmoid kernel computed on dense real valued features.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,10,1.2,1.3],[traindat,testdat,10,1.2,1.3]]

def kernel_sigmoid_modular (fm_train_real=traindat,fm_test_real=testdat,size_cache=10,gamma=1.2,coef0=1.3):

	from shogun.Features import RealFeatures
	from shogun.Kernel import SigmoidKernel

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	

	kernel=SigmoidKernel(feats_train, feats_train, size_cache, gamma, coef0)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel
	
if __name__=='__main__':
	print('Sigmoid')
	kernel_sigmoid_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_simple_locality_improved_string_modular.py

# SimpleLocalityImprovedString kernel, is a `simplified' and better performing version of the Locality improved kernel.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat,testdat,5,5,1],[traindat,testdat,5,3,2]]

def kernel_simple_locality_improved_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,
	length=5,inner_degree=5,outer_degree=1 ):
	
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import SimpleLocalityImprovedStringKernel, MSG_DEBUG

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_test=StringCharFeatures(fm_test_dna, DNA)


	kernel=SimpleLocalityImprovedStringKernel(
		feats_train, feats_train, length, inner_degree, outer_degree)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('SimpleLocalityImprovedString')
	kernel_simple_locality_improved_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_sparse_gaussian_modular.py

# This example demonstrates how to use the Gaussian Kernel with sparse features.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,1.1],[traindat,testdat,1.2]]

def kernel_sparse_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.1 ):
	from shogun.Features import SparseRealFeatures
	from shogun.Kernel import GaussianKernel

	feats_train=SparseRealFeatures(fm_train_real)
	feats_test=SparseRealFeatures(fm_test_real)


	kernel=GaussianKernel(feats_train, feats_train, width)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('SparseGaussian')
	kernel_sparse_gaussian_modular (*parameter_list[0])

../examples/documented/python_modular/kernel_sparse_linear_modular.py

# This example demonstrates how to use the Linear Kernel with sparse features.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,1.1],[traindat,testdat,1.2]]

def kernel_sparse_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.1):
	from shogun.Features import SparseRealFeatures
	from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer

	feats_train=SparseRealFeatures(fm_train_real)
	feats_test=SparseRealFeatures(fm_test_real)

	kernel=LinearKernel()
	kernel.set_normalizer(AvgDiagKernelNormalizer(scale))
	kernel.init(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('SparseLinear')
	kernel_sparse_linear_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_sparse_poly_modular.py

# This example shows how to use the polynomial kernel with sparse features.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,10,3,True],[traindat,testdat,10,4,True]]

def kernel_sparse_poly_modular (fm_train_real=traindat,fm_test_real=testdat,
		 size_cache=10,degree=3,inhomogene=True ):

	from shogun.Features import SparseRealFeatures
	from shogun.Kernel import PolyKernel

	feats_train=SparseRealFeatures(fm_train_real)
	feats_test=SparseRealFeatures(fm_test_real)



	kernel=PolyKernel(feats_train, feats_train, size_cache, degree,
		inhomogene)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('SparsePoly')
	kernel_sparse_poly_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_spherical_modular.py

# In this example the spherical kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]

def kernel_spherical_modular (fm_train_real=traindat,fm_test_real=testdat, sigma=1.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import MultiquadricKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=MultiquadricKernel(feats_train, feats_train, sigma, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Spherical')
	kernel_spherical_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_spline_modular.py

# In this example the spline kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat],[traindat,testdat]]

def kernel_spline_modular (fm_train_real=traindat,fm_test_real=testdat):
	from shogun.Features import RealFeatures
	from shogun.Kernel import SplineKernel

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	kernel=SplineKernel(feats_train, feats_train)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Spline')
	kernel_spline_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_top_modular.py

# The class TOPFeatures implements TOP kernel features obtained from
# two Hidden Markov models.
# 
# It was used in
# 
# K. Tsuda, M. Kawanabe, G. Raetsch, S. Sonnenburg, and K.R. Mueller. A new
# discriminative kernel from probabilistic models. Neural Computation,
# 14:2397-2414, 2002.
# 
# which also has the details.
# 
# Note that TOP-features are computed on the fly, so to be effective feature
# caching should be enabled.
# 
# It inherits its functionality from CSimpleFeatures, which should be
# consulted for further reference.
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where
lm=LoadMatrix()

traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')
label_traindat = lm.load_labels('../data/label_train_dna.dat')

fm_hmm_pos=[traindat[i] for i in where([label_traindat==1])[1] ]
fm_hmm_neg=[traindat[i] for i in where([label_traindat==-1])[1] ]

parameter_list = [[traindat,testdat,label_traindat,1e-1,1,0,False,[1, False, True]], \
[traindat,testdat,label_traindat,1e-1,1,0,False,[1, False, True] ]] 	

def kernel_top_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,pseudo=1e-1,
	order=1,gap=0,reverse=False,kargs=[1, False, True]):
	from shogun.Features import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA
	from shogun.Kernel import PolyKernel
	from shogun.Distribution import HMM, BW_NORMAL

	N=1 # toy HMM with 1 state 
	M=4 # 4 observations -> DNA


	# train HMM for positive class
	charfeat=StringCharFeatures(fm_hmm_pos, DNA)
	hmm_pos_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	pos=HMM(hmm_pos_train, N, M, pseudo)
	pos.baum_welch_viterbi_train(BW_NORMAL)

	# train HMM for negative class
	charfeat=StringCharFeatures(fm_hmm_neg, DNA)
	hmm_neg_train=StringWordFeatures(charfeat.get_alphabet())
	hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	neg=HMM(hmm_neg_train, N, M, pseudo)
	neg.baum_welch_viterbi_train(BW_NORMAL)

	# Kernel training data
	charfeat=StringCharFeatures(fm_train_dna, DNA)
	wordfeats_train=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	# Kernel testing data
	charfeat=StringCharFeatures(fm_test_dna, DNA)
	wordfeats_test=StringWordFeatures(charfeat.get_alphabet())
	wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	# get kernel on training data
	pos.set_observations(wordfeats_train)
	neg.set_observations(wordfeats_train)
	feats_train=TOPFeatures(10, pos, neg, False, False)
	kernel=PolyKernel(feats_train, feats_train, *kargs)
	km_train=kernel.get_kernel_matrix()

	# get kernel on testing data
	pos_clone=HMM(pos)
	neg_clone=HMM(neg)
	pos_clone.set_observations(wordfeats_test)
	neg_clone.set_observations(wordfeats_test)
	feats_test=TOPFeatures(10, pos_clone, neg_clone, False, False)
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print("TOP Kernel")
	kernel_top_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_tstudent_modular.py

# In this example the t-Student's kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 2.0],[traindat,testdat, 3.0]]

def kernel_tstudent_modular (fm_train_real=traindat,fm_test_real=testdat, degree=2.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import TStudentKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=TStudentKernel(feats_train, feats_train, degree, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('TStudent')
	kernel_tstudent_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_wave_modular.py

# In this example the wave kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 10.0]]

def kernel_wave_modular (fm_train_real=traindat,fm_test_real=testdat, theta=1.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import WaveKernel
	from shogun.Distance import EuclideanDistance

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)
	
	distance=EuclideanDistance(feats_train, feats_train)

	kernel=WaveKernel(feats_train, feats_train, theta, distance)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Wave')
	kernel_wave_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_wavelet_modular.py

# In this example the wavelet kernel is being computed for toy data.

#!/usr/bin/env python
from tools.load import LoadMatrix
from numpy import where

lm=LoadMatrix()
traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list=[[traindat,testdat, 1.5, 1.0],[traindat,testdat, 1.0, 1.5]]

def kernel_wavelet_modular (fm_train_real=traindat,fm_test_real=testdat, dilation=1.5, translation=1.0):
	from shogun.Features import RealFeatures
	from shogun.Kernel import WaveletKernel

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	kernel=WaveletKernel(feats_train, feats_train, 10, dilation, translation)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel


if __name__=='__main__':
	print('Wavelet')
	kernel_wavelet_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_weighted_comm_word_string_modular.py

# The WeightedCommWordString kernel may be used to compute the weighted
# spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer
# length is weighted by some coefficient \f$\beta_k\f$) from strings that have
# been mapped into unsigned 16bit integers.
# 
# These 16bit integers correspond to k-mers. To applicable in this kernel they
# need to be sorted (e.g. via the SortWordString pre-processor).
# 
# It basically uses the algorithm in the unix "comm" command (hence the name)
# to compute:
# 
# k({\bf x},({\bf x'})= \sum_{k=1}^K\beta_k\Phi_k({\bf x})\cdot \Phi_k({\bf x'})
# 
# where \f$\Phi_k\f$ maps a sequence \f${\bf x}\f$ that consists of letters in
# \f$\Sigma\f$ to a feature vector of size \f$|\Sigma|^k\f$. In this feature
# vector each entry denotes how often the k-mer appears in that \f${\bf x}\f$.
# 
# Note that this representation is especially tuned to small alphabets
# (like the 2-bit alphabet DNA), for which it enables spectrum kernels
# of order 8.
# 
# For this kernel the linadd speedups are quite efficiently implemented using
# direct maps.
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat,testdat],[traindat,testdat]]

def kernel_weighted_comm_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,order=3,gap=0,reverse=True ):
	from shogun.Kernel import WeightedCommWordStringKernel
	from shogun.Features import StringWordFeatures, StringCharFeatures, DNA
	from shogun.Preprocessor import SortWordString

	charfeat=StringCharFeatures(fm_train_dna, DNA)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()

	charfeat=StringCharFeatures(fm_test_dna, DNA)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	use_sign=False
	kernel=WeightedCommWordStringKernel(feats_train, feats_train, use_sign)
	km_train=kernel.get_kernel_matrix()

	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('WeightedCommWordString')
	kernel_weighted_comm_word_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_weighted_degree_position_string_modular.py

# The Weighted Degree Position String kernel (Weighted Degree kernel with shifts).
# 
# The WD-shift kernel of order d compares two sequences X and
# Y of length L by summing all contributions of k-mer matches of
# lengths k in 1...d, weighted by coefficients beta_k
# allowing for a positional tolerance of up to shift s.
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat,testdat,20],[traindat,testdat,22]]
def kernel_weighted_degree_position_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import WeightedDegreePositionStringKernel, MSG_DEBUG

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_test=StringCharFeatures(fm_test_dna, DNA)

	kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree)

	from numpy import zeros,ones,float64,int32
	kernel.set_shifts(10*ones(len(fm_train_dna[0]), dtype=int32))
	kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('WeightedDegreePositionString')
	kernel_weighted_degree_position_string_modular(*parameter_list[0])

../examples/documented/python_modular/kernel_weighted_degree_string_modular.py

# This examples shows how to create a Weighted Degree String Kernel from data
# and how to compute the kernel matrix from the resulting object.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()
traindat = lm.load_dna('../data/fm_train_dna.dat')
testdat = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindat,testdat,3],[traindat,testdat,20]]

def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20):
	from shogun.Features import StringCharFeatures, DNA
	from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG

	feats_train=StringCharFeatures(fm_train_dna, DNA)
	#feats_train.io.set_loglevel(MSG_DEBUG)
	feats_test=StringCharFeatures(fm_test_dna, DNA)
	
	kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree)

	from numpy import arange,double
	weights=arange(1,degree+1,dtype=double)[::-1]/ \
		sum(arange(1,degree+1,dtype=double))
	kernel.set_wd_weights(weights)
	#from numpy import ones,float64,int32
	#kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64))

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

    #this is how to serializate the kernel
	#import pickle
	#pickle.dump(kernel, file('kernel_obj.dump','w'), protocol=2)
	#k=pickle.load(file('kernel_obj.dump','r'))


	return km_train, km_test, kernel


if __name__=='__main__':
	print('WeightedDegreeString')
	kernel_weighted_degree_string_modular(*parameter_list[0])

Library

../examples/documented/python_modular/library_fisher2x3_modular.py

#!/usr/bin/env python
from numpy import *
from shogun.Mathematics import *

x=array([[20.0,15,15],[10,20,20]])
y=array([[21.0,21,18],[19,19,22]])
z=array([[15.0,27,18],[32,5,23]])


parameter_list = [[x,concatenate((x,y,z),1)]]

def library_fisher2x3_modular (table, tables):
	pval=Statistics_fishers_exact_test_for_2x3_table(table)
	pvals=Statistics_fishers_exact_test_for_multiple_2x3_tables(tables)
	return (pval,pvals)

if __name__=='__main__':
	print('Fisher 2x3')
	library_fisher2x3_modular(*parameter_list[0])

../examples/documented/python_modular/library_time.py

#!/usr/bin/env python
import time
from modshogun import Time

parameter_list = [[1]]
def library_time (sleep_secs):
	# measure wall clock time difference
	t=Time()
	time.sleep(1)
	diff=t.cur_time_diff()

	# measure CPU time required
	cpu_diff=t.cur_runtime_diff_sec()

	# return results as integers to enable testing
	return round(diff),round(cpu_diff)

if __name__=='__main__':
	print('Time')
	library_time(*parameter_list[0])

Mathematics

../examples/documented/python_modular/mathematics_sparseinversecovariance_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data,0.0],[data,1.0]]

def mathematics_sparseinversecovariance_modular (data,lc):
	from modshogun import SparseInverseCovariance
	from numpy import dot

	sic = SparseInverseCovariance()
	S = dot(data,data.T)
	Si = sic.estimate(S,lc)

	return Si


if __name__=='__main__':
	print('SparseInverseCovariance')
	mathematics_sparseinversecovariance_modular(*parameter_list[0])

Mkl

../examples/documented/python_modular/mkl_binclass_modular.py

# In this example we show how to perform Multiple Kernel Learning (MKL)
# with the modular interface. First, we create a number of base kernels.
# These kernels can capture different views of the same features, or actually
# consider entirely different features associated with the same example 
# (e.g. DNA sequences = strings AND gene expression data = real values of the same tissue sample). 
# The base kernels are then subsequently added to a CombinedKernel, which
# contains a weight for each kernel and encapsulates the base kernels
# from the training procedure. When the CombinedKernel between two examples is
# evaluated it computes the corresponding linear combination of kernels according to their weights.
# We then show how to create an MKLClassifier that trains an SVM and learns the optimal
# weighting of kernels (w.r.t. a given norm q) at the same time.
# Finally, the example shows how to classify with a trained MKLClassifier.
# 

#!/usr/bin/env python
from shogun.Features import CombinedFeatures, RealFeatures, BinaryLabels
from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel
from shogun.Classifier import MKLClassification
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]]
#    fm_train_real.shape
#    fm_test_real.shape
#    combined_custom()

def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat):

    ##################################
    # set up and train

    # create some poly train/test matrix
    tfeats = RealFeatures(fm_train_real)
    tkernel = PolyKernel(10,3)
    tkernel.init(tfeats, tfeats)
    K_train = tkernel.get_kernel_matrix()

    pfeats = RealFeatures(fm_test_real)
    tkernel.init(tfeats, pfeats)
    K_test = tkernel.get_kernel_matrix()

    # create combined train features
    feats_train = CombinedFeatures()
    feats_train.append_feature_obj(RealFeatures(fm_train_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_train))
    kernel.append_kernel(PolyKernel(10,2))
    kernel.init(feats_train, feats_train)

    # train mkl
    labels = BinaryLabels(fm_label_twoclass)
    mkl = MKLClassification()

    # which norm to use for MKL
    mkl.set_mkl_norm(1) #2,3

    # set cost (neg, pos)
    mkl.set_C(1, 1)

    # set kernel and labels
    mkl.set_kernel(kernel)
    mkl.set_labels(labels)

    # train
    mkl.train()
    #w=kernel.get_subkernel_weights()
    #kernel.set_subkernel_weights(w)


    ##################################
    # test

    # create combined test features
    feats_pred = CombinedFeatures()
    feats_pred.append_feature_obj(RealFeatures(fm_test_real))

    # and corresponding combined kernel
    kernel = CombinedKernel()
    kernel.append_kernel(CustomKernel(K_test))
    kernel.append_kernel(PolyKernel(10, 2))
    kernel.init(feats_train, feats_pred)

    # and classify
    mkl.set_kernel(kernel)
    mkl.apply()
    return mkl.apply(),kernel

if __name__=='__main__':
    mkl_binclass_modular (*parameter_list[0])

../examples/documented/python_modular/mkl_multiclass_modular.py

# In this example we show how to perform Multiple Kernel Learning (MKL)
# with the modular interface for multi-class classification. 
# First, we create a number of base kernels and features.
# These kernels can capture different views of the same features, or actually
# consider entirely different features associated with the same example 
# (e.g. DNA sequences = strings AND gene expression data = real values of the same tissue sample). 
# The base kernels are then subsequently added to a CombinedKernel, which
# contains a weight for each kernel and encapsulates the base kernels
# from the training procedure. When the CombinedKernel between two examples is
# evaluated it computes the corresponding linear combination of kernels according to their weights.
# We then show how to create an MKLMultiClass classifier that trains an SVM and learns the optimal
# weighting of kernels (w.r.t. a given norm q) at the same time. The main difference to the binary
# classification version of MKL is that we can use more than two values as labels, when training
# the classifier.
# Finally, the example shows how to classify with a trained MKLMultiClass classifier.
# 

#!/usr/bin/env python
from tools.load import LoadMatrix
lm = LoadMatrix()
fm_train_real = lm.load_numbers('../data/fm_train_real.dat')
fm_test_real = lm.load_numbers('../data/fm_test_real.dat')
label_train_multiclass = lm.load_labels('../data/label_train_multiclass.dat')

parameter_list=[
		[ fm_train_real, fm_test_real, label_train_multiclass, 1.2, 1.2, 1e-5, 1, 0.001, 1.5],
		[ fm_train_real, fm_test_real, label_train_multiclass, 5, 1.2, 1e-2, 1, 0.001, 2]]

def mkl_multiclass_modular (fm_train_real, fm_test_real, label_train_multiclass,
	width, C, epsilon, num_threads, mkl_epsilon, mkl_norm):

	from shogun.Features import CombinedFeatures, RealFeatures, MulticlassLabels
	from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel
	from shogun.Classifier import MKLMulticlass

	kernel = CombinedKernel()
	feats_train = CombinedFeatures()
	feats_test = CombinedFeatures()

	subkfeats_train = RealFeatures(fm_train_real)
	subkfeats_test = RealFeatures(fm_test_real)
	subkernel = GaussianKernel(10, width)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train = RealFeatures(fm_train_real)
	subkfeats_test = RealFeatures(fm_test_real)
	subkernel = LinearKernel()
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)

	subkfeats_train = RealFeatures(fm_train_real)
	subkfeats_test = RealFeatures(fm_test_real)
	subkernel = PolyKernel(10,2)
	feats_train.append_feature_obj(subkfeats_train)
	feats_test.append_feature_obj(subkfeats_test)
	kernel.append_kernel(subkernel)
	
	kernel.init(feats_train, feats_train)

	labels = MulticlassLabels(label_train_multiclass)

	mkl = MKLMulticlass(C, kernel, labels)
	
	mkl.set_epsilon(epsilon);
	mkl.parallel.set_num_threads(num_threads)
	mkl.set_mkl_epsilon(mkl_epsilon)
	mkl.set_mkl_norm(mkl_norm)

	mkl.train()

	kernel.init(feats_train, feats_test)

	out =  mkl.apply().get_labels()
	return out

if __name__ == '__main__':
	print('mkl_multiclass')
	mkl_multiclass_modular(*parameter_list[0])

Modelselection

../examples/documented/python_modular/modelselection_grid_search_kernel.py

#!/usr/bin/env python
#
# This program is free software you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation either version 3 of the License, or
# (at your option) any later version.
#
# Written (C) 2012 Heiko Strathmann
#

from numpy import array
from numpy.random import rand

from shogun.Evaluation import CrossValidation, CrossValidationResult
from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
from shogun.Evaluation import StratifiedCrossValidationSplitting
from shogun.Features import BinaryLabels
from shogun.Features import RealFeatures
from shogun.Kernel import GaussianKernel, PowerKernel
from shogun.Classifier import LibSVM
from shogun.Distance import MinkowskiMetric
from shogun.ModelSelection import GridSearchModelSelection
from shogun.ModelSelection import ModelSelectionParameters, R_EXP, R_LINEAR
from shogun.ModelSelection import ParameterCombination

def create_param_tree():
	root=ModelSelectionParameters()

	c1=ModelSelectionParameters("C1")
	root.append_child(c1)
	c1.build_values(-1.0, 1.0, R_EXP)

	c2=ModelSelectionParameters("C2")
	root.append_child(c2)
	c2.build_values(-1.0, 1.0, R_EXP)

	gaussian_kernel=GaussianKernel()

	# print all parameter available for modelselection
	# Dont worry if yours is not included, simply write to the mailing list
	gaussian_kernel.print_modsel_params()

	param_gaussian_kernel=ModelSelectionParameters("kernel", gaussian_kernel)
	gaussian_kernel_width=ModelSelectionParameters("width")
	gaussian_kernel_width.build_values(-1.0, 1.0, R_EXP, 1.0, 2.0)
	param_gaussian_kernel.append_child(gaussian_kernel_width)
	root.append_child(param_gaussian_kernel)

	power_kernel=PowerKernel()

	# print all parameter available for modelselection
	# Dont worry if yours is not included, simply write to the mailing list
	power_kernel.print_modsel_params()

	param_power_kernel=ModelSelectionParameters("kernel", power_kernel)
	root.append_child(param_power_kernel)

	param_power_kernel_degree=ModelSelectionParameters("degree")
	param_power_kernel_degree.build_values(1.0, 2.0, R_LINEAR)
	param_power_kernel.append_child(param_power_kernel_degree)

	metric=MinkowskiMetric(10)

	# print all parameter available for modelselection
	# Dont worry if yours is not included, simply write to the mailing list
	metric.print_modsel_params()

	param_power_kernel_metric1=ModelSelectionParameters("distance", metric)

	param_power_kernel.append_child(param_power_kernel_metric1)

	param_power_kernel_metric1_k=ModelSelectionParameters("k")
	param_power_kernel_metric1_k.build_values(1.0, 2.0, R_LINEAR)
	param_power_kernel_metric1.append_child(param_power_kernel_metric1_k)

	return root


def modelselection_grid_search_kernel ():
	num_subsets=3
	num_vectors=20
	dim_vectors=3

	# create some (non-sense) data
	matrix=rand(dim_vectors, num_vectors)

	# create num_feautres 2-dimensional vectors
	features=RealFeatures()
	features.set_feature_matrix(matrix)

	# create labels, two classes
	labels=BinaryLabels(num_vectors)
	for i in range(num_vectors):
		labels.set_label(i, 1 if i%2==0 else -1)

	# create svm
	classifier=LibSVM()

	# splitting strategy
	splitting_strategy=StratifiedCrossValidationSplitting(labels, num_subsets)

	# accuracy evaluation
	evaluation_criterion=ContingencyTableEvaluation(ACCURACY)

	# cross validation class for evaluation in model selection
	cross=CrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterion)
	cross.set_num_runs(1)

	# print all parameter available for modelselection
	# Dont worry if yours is not included, simply write to the mailing list
	classifier.print_modsel_params()

	# model parameter selection
	param_tree=create_param_tree()
	param_tree.print_tree()

	grid_search=GridSearchModelSelection(param_tree, cross)

	print_state=True
	best_combination=grid_search.select_model(print_state)
	print("best parameter(s):")
	best_combination.print_tree()

	best_combination.apply_to_machine(classifier)

	# larger number of runs to have tighter confidence intervals
	cross.set_num_runs(10)
	cross.set_conf_int_alpha(0.01)
	result=cross.evaluate()
	print("result: ")
	#result.print_result()

	return 0
	
if __name__=='__main__':
	print('ModelselectionGridSearchKernel')
	modelselection_grid_search_kernel()

../examples/documented/python_modular/modelselection_grid_search_krr_modular.py

#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2012 Heiko Strathmann
# Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
#

from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')


parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5,1e-2], \
                 [traindat,testdat,label_traindat,2.1,1,1e-5,1e-2]]

def modelselection_grid_search_krr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
				       width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import MeanSquaredError
    from shogun.Evaluation import CrossValidationSplitting
    from shogun.Features import RegressionLabels
    from shogun.Features import RealFeatures
    from shogun.Regression import KernelRidgeRegression
    from shogun.ModelSelection import GridSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters

    # training data
    features_train=RealFeatures(traindat)
    features_test=RealFeatures(testdat)
    labels=RegressionLabels(label_traindat)

    # labels
    labels=RegressionLabels(label_train)

    # predictor, set tau=0 here, doesnt matter
    predictor=KernelRidgeRegression()

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=MeanSquaredError()

    # cross-validation instance
    cross_validation=CrossValidation(predictor, features_train, labels,
	    splitting_strategy, evaluation_criterium)
	
    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #predictor.print_modsel_params()

    # build parameter tree to select regularization parameter
    param_tree_root=create_param_tree()

    # model selection instance
    model_selection=GridSearchModelSelection(param_tree_root,
	    cross_validation)

    # perform model selection with selected methods
    #print "performing model selection of"
    #print "parameter tree:"
    #param_tree_root.print_tree()
    
    #print "starting model selection"
    # print the current parameter combination, if no parameter nothing is printed
    print_state=False
    
    best_parameters=model_selection.select_model(print_state)

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(predictor)
    result=cross_validation.evaluate()
    #print "mean:", result.mean
    #if result.has_conf_int:
    #    print "[", result.conf_int_low, ",", result.conf_int_up, "] with alpha=", result.conf_int_alpha

# creates all the parameters to optimize
def create_param_tree():
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP, R_LINEAR
    from shogun.ModelSelection import ParameterCombination
    from shogun.Kernel import GaussianKernel, PolyKernel
    root=ModelSelectionParameters()

    tau=ModelSelectionParameters("tau")
    root.append_child(tau)

    # also R_LINEAR/R_LOG is available as type
    min=-1
    max=1
    type=R_EXP
    step=1.5
    base=2
    tau.build_values(min, max, type, step, base)

    # gaussian kernel with width
    gaussian_kernel=GaussianKernel()
    
    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #gaussian_kernel.print_modsel_params()
    
    param_gaussian_kernel=ModelSelectionParameters("kernel", gaussian_kernel)
    gaussian_kernel_width=ModelSelectionParameters("width");
    gaussian_kernel_width.build_values(5.0, 8.0, R_EXP, 1.0, 2.0)
    param_gaussian_kernel.append_child(gaussian_kernel_width)
    root.append_child(param_gaussian_kernel)

    # polynomial kernel with degree
    poly_kernel=PolyKernel()
    
    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #poly_kernel.print_modsel_params()
    
    param_poly_kernel=ModelSelectionParameters("kernel", poly_kernel)

    root.append_child(param_poly_kernel)

    # note that integers are used here
    param_poly_kernel_degree=ModelSelectionParameters("degree")
    param_poly_kernel_degree.build_values(1, 2, R_LINEAR)
    param_poly_kernel.append_child(param_poly_kernel_degree)

    return root


if __name__=='__main__':
	print('ModelselectionGridSearchKRR')
	modelselection_grid_search_krr_modular(*parameter_list[0])

../examples/documented/python_modular/modelselection_grid_search_liblinear_modular.py

#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2011 Heiko Strathmann
# Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
#

from numpy.random import randn
from numpy import *

# generate some overlapping training vectors
num_vectors=100
vec_distance=1
traindat=concatenate((randn(2,num_vectors)-vec_distance,
	randn(2,num_vectors)+vec_distance), axis=1)
label_traindat=concatenate((-ones(num_vectors), ones(num_vectors)));

parameter_list = [[traindat,label_traindat]]

def modelselection_grid_search_liblinear_modular (traindat=traindat, label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.ModelSelection import GridSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP
    from shogun.ModelSelection import ParameterCombination
    from shogun.Features import BinaryLabels
    from shogun.Features import RealFeatures
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC

    # build parameter tree to select C1 and C2 
    param_tree_root=ModelSelectionParameters()
    c1=ModelSelectionParameters("C1");
    param_tree_root.append_child(c1)
    c1.build_values(-2.0, 2.0, R_EXP);

    c2=ModelSelectionParameters("C2");
    param_tree_root.append_child(c2);
    c2.build_values(-2.0, 2.0, R_EXP);

    # training data
    features=RealFeatures(traindat)
    labels=BinaryLabels(label_traindat)

    # classifier
    classifier=LibLinear(L2R_L2LOSS_SVC)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #classifier.print_modsel_params()

    # splitting strategy for cross-validation
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 10)

    # evaluation method
    evaluation_criterium=ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation=CrossValidation(classifier, features, labels,
                                     splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)

    # model selection instance
    model_selection=GridSearchModelSelection(param_tree_root,
	    cross_validation) 

    # perform model selection with selected methods
    #print "performing model selection of"
    #param_tree_root.print_tree()
    best_parameters=model_selection.select_model()

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(classifier)
    result=cross_validation.evaluate()
    #result.print_result()

if __name__=='__main__':
    print('ModelSelectionGridSearchLibLinear')
    modelselection_grid_search_liblinear_modular(*parameter_list[0])

../examples/documented/python_modular/modelselection_grid_search_libsvr_modular.py

#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2012 Heiko Strathmann
# Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
#

from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')


parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5,1e-2], \
                 [traindat,testdat,label_traindat,2.1,1,1e-5,1e-2]]

def modelselection_grid_search_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
				       width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import MeanSquaredError
    from shogun.Evaluation import CrossValidationSplitting
    from shogun.Features import RegressionLabels
    from shogun.Features import RealFeatures
    from shogun.Kernel import GaussianKernel
    from shogun.Regression import LibSVR
    from shogun.ModelSelection import GridSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP
    from shogun.ModelSelection import ParameterCombination

    # training data
    features_train=RealFeatures(traindat)
    labels=RegressionLabels(label_traindat)

    # kernel
    kernel=GaussianKernel(features_train, features_train, width)
    
    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #kernel.print_modsel_params()
    
    labels=RegressionLabels(label_train)

    # predictor
    predictor=LibSVR(C, tube_epsilon, kernel, labels)
    predictor.set_epsilon(epsilon)

    # splitting strategy for 5 fold cross-validation (for classification its better
    # to use "StratifiedCrossValidation", but the standard
    # "StratifiedCrossValidationSplitting" is also available
    splitting_strategy=CrossValidationSplitting(labels, 5)

    # evaluation method
    evaluation_criterium=MeanSquaredError()

    # cross-validation instance
    cross_validation=CrossValidation(predictor, features_train, labels,
	    splitting_strategy, evaluation_criterium)
	
    # (optional) repeat x-val 10 times
    cross_validation.set_num_runs(10)

    # (optional) request 95% confidence intervals for results (not actually needed
    # for this toy example)
    cross_validation.set_conf_int_alpha(0.05)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #predictor.print_modsel_params()

    # build parameter tree to select C1 and C2 
    param_tree_root=ModelSelectionParameters()
    c1=ModelSelectionParameters("C1");
    param_tree_root.append_child(c1)
    c1.build_values(-2.0, 2.0, R_EXP);

    c2=ModelSelectionParameters("C2");
    param_tree_root.append_child(c2);
    c2.build_values(-2.0, 2.0, R_EXP);

    # model selection instance
    model_selection=GridSearchModelSelection(param_tree_root,
	    cross_validation)

    # perform model selection with selected methods
    #print "performing model selection of"
    #print "parameter tree"
    #param_tree_root.print_tree()
    
    #print "starting model selection"
    # print the current parameter combination, if no parameter nothing is printed
    print_state=False
    # lock data before since model selection will not change the kernel matrix
    # (use with care) This avoids that the kernel matrix is recomputed in every
    # iteration of the model search
    predictor.data_lock(labels, features_train)
    best_parameters=model_selection.select_model(print_state)

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(predictor)
    result=cross_validation.evaluate()
    #print "mean:", result.mean
    #if result.has_conf_int:
    #    print "[", result.conf_int_low, ",", result.conf_int_up, "] with alpha=", result.conf_int_alpha

if __name__=='__main__':
	print('ModelselectionGridSearchLibSVR')
	modelselection_grid_search_libsvr_modular(*parameter_list[0])

../examples/documented/python_modular/modelselection_parameter_tree_modular.py

# In this example a complex model parameters selection tree 
# is being constructed

#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2011-2012 Heiko Strathmann
# Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
#

parameter_list=[[None]]

def modelselection_parameter_tree_modular (dummy):
    from shogun.ModelSelection import ParameterCombination
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP, R_LINEAR
    from shogun.Kernel import PowerKernel
    from shogun.Kernel import GaussianKernel
    from shogun.Kernel import DistantSegmentsKernel
    from shogun.Distance import MinkowskiMetric

    root=ModelSelectionParameters()

    combinations=root.get_combinations()
    combinations.get_num_elements()

    c=ModelSelectionParameters('C');
    root.append_child(c)
    c.build_values(1, 11, R_EXP)

    power_kernel=PowerKernel()

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #power_kernel.print_modsel_params()

    param_power_kernel=ModelSelectionParameters('kernel', power_kernel)
    root.append_child(param_power_kernel)

    param_power_kernel_degree=ModelSelectionParameters('degree')
    param_power_kernel_degree.build_values(1, 1, R_EXP)
    param_power_kernel.append_child(param_power_kernel_degree)

    metric1=MinkowskiMetric(10)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #metric1.print_modsel_params()

    param_power_kernel_metric1=ModelSelectionParameters('distance', metric1)

    param_power_kernel.append_child(param_power_kernel_metric1)

    param_power_kernel_metric1_k=ModelSelectionParameters('k')
    param_power_kernel_metric1_k.build_values(1, 12, R_LINEAR)
    param_power_kernel_metric1.append_child(param_power_kernel_metric1_k)

    gaussian_kernel=GaussianKernel()

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #gaussian_kernel.print_modsel_params()

    param_gaussian_kernel=ModelSelectionParameters('kernel', gaussian_kernel)

    root.append_child(param_gaussian_kernel)

    param_gaussian_kernel_width=ModelSelectionParameters('width')
    param_gaussian_kernel_width.build_values(1, 2, R_EXP)
    param_gaussian_kernel.append_child(param_gaussian_kernel_width)

    ds_kernel=DistantSegmentsKernel()

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #ds_kernel.print_modsel_params()

    param_ds_kernel=ModelSelectionParameters('kernel', ds_kernel)

    root.append_child(param_ds_kernel)

    param_ds_kernel_delta=ModelSelectionParameters('delta')
    param_ds_kernel_delta.build_values(1, 2, R_EXP)
    param_ds_kernel.append_child(param_ds_kernel_delta)

    param_ds_kernel_theta=ModelSelectionParameters('theta')
    param_ds_kernel_theta.build_values(1, 2, R_EXP)
    param_ds_kernel.append_child(param_ds_kernel_theta)

    #	root.print_tree()
    combinations=root.get_combinations()
    #	for i in range(combinations.get_num_elements()):
    #		combinations.get_element(i).print_tree()

    return


if __name__=='__main__':
    print('ModelSelection ParameterTree')
    modelselection_parameter_tree_modular(*parameter_list[0])

../examples/documented/python_modular/modelselection_random_search_liblinear_modular.py

#!/usr/bin/env python
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Copyright (C) 2012 Sergey Lisitsyn

from numpy import *
from numpy.random import randn

# generate some overlapping training vectors
num_vectors=100
vec_distance=1
traindat=concatenate((randn(2,num_vectors)-vec_distance,
	randn(2,num_vectors)+vec_distance), axis=1)
label_traindat=concatenate((-ones(num_vectors), ones(num_vectors)));

parameter_list = [[traindat,label_traindat]]

def modelselection_random_search_liblinear_modular (traindat=traindat, label_traindat=label_traindat):
    from shogun.Evaluation import CrossValidation, CrossValidationResult
    from shogun.Evaluation import ContingencyTableEvaluation, ACCURACY
    from shogun.Evaluation import StratifiedCrossValidationSplitting
    from shogun.ModelSelection import RandomSearchModelSelection
    from shogun.ModelSelection import ModelSelectionParameters, R_EXP
    from shogun.ModelSelection import ParameterCombination
    from shogun.Features import BinaryLabels
    from shogun.Features import RealFeatures
    from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC

    # build parameter tree to select C1 and C2 
    param_tree_root=ModelSelectionParameters()
    c1=ModelSelectionParameters("C1");
    param_tree_root.append_child(c1)
    c1.build_values(-2.0, 2.0, R_EXP);

    c2=ModelSelectionParameters("C2");
    param_tree_root.append_child(c2);
    c2.build_values(-2.0, 2.0, R_EXP);

    # training data
    features=RealFeatures(traindat)
    labels=BinaryLabels(label_traindat)

    # classifier
    classifier=LibLinear(L2R_L2LOSS_SVC)

    # print all parameter available for modelselection
    # Dont worry if yours is not included but, write to the mailing list
    #classifier.print_modsel_params()

    # splitting strategy for cross-validation
    splitting_strategy=StratifiedCrossValidationSplitting(labels, 10)

    # evaluation method
    evaluation_criterium=ContingencyTableEvaluation(ACCURACY)

    # cross-validation instance
    cross_validation=CrossValidation(classifier, features, labels,
                                     splitting_strategy, evaluation_criterium)
    cross_validation.set_autolock(False)

    # model selection instance
    model_selection=RandomSearchModelSelection(param_tree_root,
	    cross_validation,0.5) 

    # perform model selection with selected methods
    #print "performing model selection of"
    #param_tree_root.print_tree()
    best_parameters=model_selection.select_model()

    # print best parameters
    #print "best parameters:"
    #best_parameters.print_tree()

    # apply them and print result
    best_parameters.apply_to_machine(classifier)
    result=cross_validation.evaluate()
    #result.print_result()

if __name__=='__main__':
    print('ModelSelectionRandomSearchLibLinear')
    modelselection_random_search_liblinear_modular(*parameter_list[0])

Preprocessor

../examples/documented/python_modular/preprocessor_dimensionreductionpreprocessor_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data, 3], [data, 10]]

def preprocessor_dimensionreductionpreprocessor_modular (data, k):
	from shogun.Features import RealFeatures
	from shogun.Preprocessor import DimensionReductionPreprocessor
	from shogun.Converter import LocallyLinearEmbedding
	
	features = RealFeatures(data)

	converter = LocallyLinearEmbedding()
	converter.set_k(k)

	preprocessor = DimensionReductionPreprocessor(converter)
	preprocessor.init(features)
	preprocessor.apply_to_feature_matrix(features)

	return features


if __name__=='__main__':
	print('DimensionReductionPreprocessor')
	preprocessor_dimensionreductionpreprocessor_modular(*parameter_list[0])

../examples/documented/python_modular/preprocessor_kernelpca_modular.py

# In this example toy data is being processed using the kernel PCA algorithm
# as described in
# 
# Schölkopf, B., Smola, A. J., & Muller, K. R. (1999).
# Kernel Principal Component Analysis.
# Advances in kernel methods support vector learning, 1327(3), 327-352. MIT Press.
# Retrieved from http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.32.8744i
# 
# A gaussian kernel is used for the processing.

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data, 0.01, 1.0], [data, 0.05, 2.0]]

def preprocessor_kernelpca_modular (data, threshold, width):
	from shogun.Features import RealFeatures
	from shogun.Preprocessor import KernelPCA
	from shogun.Kernel import GaussianKernel
	
	features = RealFeatures(data)
	
	kernel = GaussianKernel(features,features,width)
		
	preprocessor = KernelPCA(kernel)
	preprocessor.init(features)
	preprocessor.apply_to_feature_matrix(features)

	return features


if __name__=='__main__':
	print('KernelPCA')
	preprocessor_kernelpca_modular(*parameter_list[0])

../examples/documented/python_modular/preprocessor_logplusone_modular.py

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# LogPlusOne adds one to a dense real-valued vector and takes the logarithm of
# each component of it. It is most useful in situations where the inputs are
# counts: When one compares differences of small counts any difference may matter
# a lot, while small differences in large counts don't. This is what this log
# transformation controls for.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat+10,testdat+10,1.4,10],[traindat+10,testdat+10,1.5,10]]

def preprocessor_logplusone_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):

	from shogun.Kernel import Chi2Kernel
	from shogun.Features import RealFeatures
	from shogun.Preprocessor import LogPlusOne

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	preproc=LogPlusOne()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	
	kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

	return km_train,km_test,kernel

if __name__=='__main__':
	print('LogPlusOne')
	preprocessor_logplusone_modular(*parameter_list[0])

../examples/documented/python_modular/preprocessor_normone_modular.py

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# NormOne, normalizes vectors to have norm 1.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,1.4,10],[traindat,testdat,1.5,10]]

def preprocessor_normone_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):

	from shogun.Kernel import Chi2Kernel
	from shogun.Features import RealFeatures
	from shogun.Preprocessor import NormOne

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	preprocessor=NormOne()
	preprocessor.init(feats_train)
	feats_train.add_preprocessor(preprocessor)
	feats_train.apply_preprocessor()
	feats_test.add_preprocessor(preprocessor)
	feats_test.apply_preprocessor()

	kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

	return km_train,km_test,kernel

if __name__=='__main__':
	print('NormOne')
	preprocessor_normone_modular(*parameter_list[0])

../examples/documented/python_modular/preprocessor_pca_modular.py

# In this example toy data is being processed using the
# Principal Component Analysis. 

#!/usr/bin/env python
from tools.load import LoadMatrix

lm=LoadMatrix()
data = lm.load_numbers('../data/fm_train_real.dat')

parameter_list = [[data]]

def preprocessor_pca_modular (data):
	from shogun.Features import RealFeatures
	from shogun.Preprocessor import PCA
	
	features = RealFeatures(data)
		
	preprocessor = PCA()
	preprocessor.init(features)
	preprocessor.apply_to_feature_matrix(features)

	return features


if __name__=='__main__':
	print('PCA')
	preprocessor_pca_modular(*parameter_list[0])

../examples/documented/python_modular/preprocessor_prunevarsubmean_modular.py

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# PruneVarSubMean substracts the mean from each feature and removes features that
# have zero variance.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,1.5,10],[traindat,testdat,1.5,10]]

def preprocessor_prunevarsubmean_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
	from shogun.Kernel import Chi2Kernel
	from shogun.Features import RealFeatures
	from shogun.Preprocessor import PruneVarSubMean

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	preproc=PruneVarSubMean()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

	return km_train,km_test,kernel

if __name__=='__main__':
	print('PruneVarSubMean')
	preprocessor_prunevarsubmean_modular(*parameter_list[0])

../examples/documented/python_modular/preprocessor_randomfouriergausspreproc_modular.py

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')

parameter_list = [[traindat,testdat,1.5,10],[traindat,testdat,1.5,10]]

def preprocessor_randomfouriergausspreproc_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10):
	from shogun.Kernel import Chi2Kernel
	from shogun.Features import RealFeatures
	from shogun.Preprocessor import RandomFourierGaussPreproc

	feats_train=RealFeatures(fm_train_real)
	feats_test=RealFeatures(fm_test_real)

	preproc=RandomFourierGaussPreproc()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	kernel=Chi2Kernel(feats_train, feats_train, width, size_cache)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

	return km_train,km_test,kernel

if __name__=='__main__':
	print('RandomFourierGaussPreproc')
	preprocessor_randomfouriergausspreproc_modular(*parameter_list[0])

../examples/documented/python_modular/preprocessor_sortulongstring_modular.py

# In this example a kernel matrix is computed for a given string data set. The
# CommUlongString kernel is used to compute the spectrum kernel from strings that
# have been mapped into unsigned 64bit integers. These 64bit integers correspond
# to k-mers. To be applicable in this kernel the mapped k-mers have to be sorted.
# This is done using the SortUlongString preprocessor, which sorts the indivual
# strings in ascending order. The kernel function basically uses the algorithm in
# the unix "comm" command (hence the name). Note that this representation enables
# spectrum kernels of order 8 for 8bit alphabets (like binaries) and order 32 for
# 2-bit alphabets like DNA. For this kernel the linadd speedups are implemented
# (though there is room for improvement here when a whole set of sequences is
# ADDed) using sorted lists.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindna,testdna,4,0,False,False],[traindna,testdna,3,0,False,False]]

def preprocessor_sortulongstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):

	from shogun.Kernel import CommUlongStringKernel
	from shogun.Features import StringCharFeatures, StringUlongFeatures, DNA
	from shogun.Preprocessor import SortUlongString


	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_train_dna)
	feats_train=StringUlongFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)

	charfeat=StringCharFeatures(DNA)
	charfeat.set_features(fm_test_dna)
	feats_test=StringUlongFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)

	preproc=SortUlongString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	kernel=CommUlongStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()
	return km_train,km_test,kernel

if __name__=='__main__':
	print('CommUlongString')
	preprocessor_sortulongstring_modular(*parameter_list[0])

../examples/documented/python_modular/preprocessor_sortwordstring_modular.py

# In this example a kernel matrix is computed for a given string data set. The
# CommWordString kernel is used to compute the spectrum kernel from strings that
# have been mapped into unsigned 16bit integers. These 16bit integers correspond
# to k-mers. To be applicable in this kernel the mapped k-mers have to be sorted.
# This is done using the SortWordString preprocessor, which sorts the indivual
# strings in ascending order. The kernel function basically uses the algorithm in
# the unix "comm" command (hence the name). Note that this representation is
# especially tuned to small alphabets (like the 2-bit alphabet DNA), for which it
# enables spectrum kernels of order up to 8. For this kernel the linadd speedups
# are quite efficiently implemented using direct maps.

#!/usr/bin/env python
from tools.load import LoadMatrix
lm=LoadMatrix()

traindna = lm.load_dna('../data/fm_train_dna.dat')
testdna = lm.load_dna('../data/fm_test_dna.dat')

parameter_list = [[traindna,testdna,3,0,False,False],[traindna,testdna,3,0,False,False]]

def preprocessor_sortwordstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False):

	from shogun.Kernel import CommWordStringKernel
	from shogun.Features import StringCharFeatures, StringWordFeatures, DNA
	from shogun.Preprocessor import SortWordString

	charfeat=StringCharFeatures(fm_train_dna, DNA)
	feats_train=StringWordFeatures(charfeat.get_alphabet())
	feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=SortWordString()
	preproc.init(feats_train)
	feats_train.add_preprocessor(preproc)
	feats_train.apply_preprocessor()

	charfeat=StringCharFeatures(fm_test_dna, DNA)
	feats_test=StringWordFeatures(charfeat.get_alphabet())
	feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test.add_preprocessor(preproc)
	feats_test.apply_preprocessor()

	kernel=CommWordStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel.get_kernel_matrix()
	kernel.init(feats_train, feats_test)
	km_test=kernel.get_kernel_matrix()

	return km_train,km_test,kernel

if __name__=='__main__':
	print('CommWordString')
	preprocessor_sortwordstring_modular(*parameter_list[0])

Regression

../examples/documented/python_modular/regression_gaussian_process_modular.py

#!/usr/bin/env python
from numpy import concatenate, ones
from numpy.random import randn
num=100
dist=1

traindat=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1)
testdat=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1)
label_traindat = concatenate((-ones(num), ones(num)))

parameter_list=[[traindat, testdat, label_traindat, 2.1]]

def regression_gaussian_process_modular (traindata_real=traindat, \
		testdata_real=testdat, \
		trainlab=label_traindat, width=2.1):
	from numpy.random import randn
	from shogun.Features import RealFeatures, RegressionLabels
	from shogun.Kernel import GaussianKernel
	try:
		from shogun.Regression import GaussianLikelihood, ZeroMean, \
				ExactInferenceMethod, GaussianProcessRegression
	except ImportError:
		print "Eigen3 needed for Gaussian Processes"
		return

	labels=RegressionLabels(trainlab)

	feats_train=RealFeatures(traindata_real)
	feats_test=RealFeatures(testdata_real)
	kernel=GaussianKernel(feats_train, feats_train, width)
	zmean = ZeroMean()
	lik = GaussianLikelihood()
	inf = ExactInferenceMethod(kernel, feats_train, zmean, labels, lik)
	gp = GaussianProcessRegression(inf, feats_train, labels)

	alpha = inf.get_alpha()
	diagonal = inf.get_diagonal_vector()
	cholesky = inf.get_cholesky()
	gp.set_return_type(GaussianProcessRegression.GP_RETURN_COV)

	covariance = gp.apply_regression(feats_test)

	gp.set_return_type(GaussianProcessRegression.GP_RETURN_MEANS)

	predictions = gp.apply_regression()

	print("Alpha Vector")
	print(alpha)

	print("Labels")
	print(labels.get_labels())

	print("sW Matrix")
	print(diagonal)

	print("Covariances")
	print(covariance.get_labels())

	print("Mean Predictions")
	print(predictions.get_labels())

	print("Cholesky Matrix L")
	print(cholesky)
	return gp, alpha, labels, diagonal, covariance, predictions, cholesky

if __name__=='__main__':
	print('Gaussian Process Regression')
	regression_gaussian_process_modular(*parameter_list[0])

../examples/documented/python_modular/regression_kernel_ridge_modular.py

#!/usr/bin/env python
###########################################################################
# kernel ridge regression
###########################################################################
from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')


parameter_list = [[traindat,testdat,label_traindat,0.8,1e-6],[traindat,testdat,label_traindat,0.9,1e-7]]

def regression_kernel_ridge_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,width=0.8,tau=1e-6):

	from shogun.Features import RegressionLabels, RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Regression import KernelRidgeRegression

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)

	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=RegressionLabels(label_train)

	krr=KernelRidgeRegression(tau, kernel, labels)
	krr.train(feats_train)

	kernel.init(feats_train, feats_test)
	out = krr.apply().get_labels()
	return out,kernel,krr

# equivialent shorter version
def krr_short ():
	print('KRR_short')
	from shogun.Features import RegressionLabels, RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Regression import KernelRidgeRegression

	width=0.8; tau=1e-6
	krr=KernelRidgeRegression(tau, GaussianKernel(0, width), RegressionLabels(label_train))
	krr.train(RealFeatures(fm_train))
	out = krr.apply(RealFeatures(fm_test)).get_labels()

	return krr,out

if __name__=='__main__':
	print('KRR')
	regression_kernel_ridge_modular(*parameter_list[0])

../examples/documented/python_modular/regression_least_squares_modular.py

#!/usr/bin/env python
###########################################################################
# kernel ridge regression
###########################################################################
from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')


parameter_list = [[traindat,testdat,label_traindat]]

def regression_least_squares_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,tau=1e-6):

	from shogun.Features import RegressionLabels, RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Regression import LeastSquaresRegression

	ls=LeastSquaresRegression(RealFeatures(traindat), RegressionLabels(label_train))
	ls.train()
	out = ls.apply(RealFeatures(fm_test)).get_labels()
	return out,ls

if __name__=='__main__':
	print('LeastSquaresRegression')
	regression_least_squares_modular(*parameter_list[0])

../examples/documented/python_modular/regression_libsvr_modular.py

# In this example a support vector regression algorithm is trained on a
# real-valued toy data set. The underlying library used for the SVR training is
# LIBSVM. The SVR is trained with regularization parameter C=1 and a gaussian
# kernel with width=2.1. The labels of both the train and the test data are
# fetched via svr.classify().get_labels().
# 
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/ .

#!/usr/bin/env python
from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')


parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5,1e-2], \
                 [traindat,testdat,label_traindat,2.1,1,1e-5,1e-2]]


def regression_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\
				       width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2):

	from shogun.Features import RegressionLabels, RealFeatures
	from shogun.Kernel import GaussianKernel
	from shogun.Regression import LibSVR

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)

	kernel=GaussianKernel(feats_train, feats_train, width)
	labels=RegressionLabels(label_train)

	svr=LibSVR(C, tube_epsilon, kernel, labels)
	svr.set_epsilon(epsilon)
	svr.train()

	kernel.init(feats_train, feats_test)
	out1=svr.apply().get_labels()
	out2=svr.apply(feats_test).get_labels()

	return out1,out2,kernel

if __name__=='__main__':
	print('LibSVR')
	regression_libsvr_modular(*parameter_list[0])

../examples/documented/python_modular/regression_linear_ridge_modular.py

#!/usr/bin/env python
###########################################################################
# linear ridge regression
###########################################################################
from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')


parameter_list = [[traindat,testdat,label_traindat,1e-6],[traindat,testdat,label_traindat,100]]

def regression_linear_ridge_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,tau=1e-6):

	from shogun.Features import RegressionLabels, RealFeatures
	from shogun.Regression import LinearRidgeRegression

	rr=LinearRidgeRegression(tau, RealFeatures(traindat), RegressionLabels(label_train))
	rr.train()
	out = rr.apply(RealFeatures(fm_test)).get_labels()
	return out,rr

if __name__=='__main__':
	print('LinearRidgeRegression')
	regression_linear_ridge_modular(*parameter_list[0])

../examples/documented/python_modular/regression_svrlight_modular.py

# In this example a support vector regression algorithm is trained on a
# real-valued toy data set. The underlying library used for the SVR training is
# SVM^light. The SVR is trained with regularization parameter C=1 and a gaussian
# kernel with width=2.1. The the label of both the train and the test data are
# fetched via svr.classify().get_labels().
# 
# For more details on the SVM^light see
#  T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
#  Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.

#!/usr/bin/env python
###########################################################################
# svm light based support vector regression
###########################################################################
from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat,1.2,1,1e-5,1e-2,1],[traindat,testdat,label_traindat,2.3,0.5,1e-5,1e-6,1]]

def regression_svrlight_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat, \
				    width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3):


	from shogun.Features import RegressionLabels, RealFeatures
	from shogun.Kernel import GaussianKernel
	try:
		from shogun.Regression import SVRLight
	except ImportError:
		print('No support for SVRLight available.')
		return

	feats_train=RealFeatures(fm_train)
	feats_test=RealFeatures(fm_test)

	kernel=GaussianKernel(feats_train, feats_train, width)

	labels=RegressionLabels(label_train)

	svr=SVRLight(C, epsilon, kernel, labels)
	svr.set_tube_epsilon(tube_epsilon)
	svr.parallel.set_num_threads(num_threads)
	svr.train()

	kernel.init(feats_train, feats_test)
	out = svr.apply().get_labels()
	
	return out, kernel 

if __name__=='__main__':
	print('SVRLight')
	regression_svrlight_modular(*parameter_list[0])

Serialization

../examples/documented/python_modular/serialization_complex_example.py

# In this example serialization of SVM (Support Vector Machine) is shown

#!/usr/bin/env python
parameter_list=[[5,1,10, 2.0, 10], [10,0.3,2, 1.0, 0.1]]

def check_status(status,suffix):
	# silent...
	assert(status)
	if  status:
		print("OK reading/writing %s\n" % suffix)
	else:
		print("ERROR reading/writing %s\n" % suffix)

def serialization_complex_example (num=5, dist=1, dim=10, C=2.0, width=10):
	import os
	from numpy import concatenate, zeros, ones
	from numpy.random import randn, seed
	from shogun.Features import RealFeatures, MulticlassLabels
	from shogun.Classifier import GMNPSVM
	from shogun.Kernel import GaussianKernel
	from shogun.IO import SerializableHdf5File,SerializableAsciiFile, \
			SerializableJsonFile,SerializableXmlFile,MSG_DEBUG
	from shogun.Preprocessor import NormOne, LogPlusOne

	seed(17)

	data=concatenate((randn(dim, num), randn(dim, num) + dist,
					  randn(dim, num) + 2*dist,
					  randn(dim, num) + 3*dist), axis=1)
	lab=concatenate((zeros(num), ones(num), 2*ones(num), 3*ones(num)))

	feats=RealFeatures(data)
	#feats.io.set_loglevel(MSG_DEBUG)
	kernel=GaussianKernel(feats, feats, width)

	labels=MulticlassLabels(lab)

	svm = GMNPSVM(C, kernel, labels)

	feats.add_preprocessor(NormOne())
	feats.add_preprocessor(LogPlusOne())
	feats.set_preprocessed(1)
	svm.train(feats)

	#svm.print_serializable()

	fstream = SerializableHdf5File("blaah.h5", "w")
	status = svm.save_serializable(fstream)
	check_status(status,'h5')

	fstream = SerializableAsciiFile("blaah.asc", "w")
	status = svm.save_serializable(fstream)
	check_status(status,'asc')

	fstream = SerializableJsonFile("blaah.json", "w")
	status = svm.save_serializable(fstream)
	check_status(status,'json')

	fstream = SerializableXmlFile("blaah.xml", "w")
	status = svm.save_serializable(fstream)
	check_status(status,'xml')


	fstream = SerializableHdf5File("blaah.h5", "r")
	new_svm=GMNPSVM()
	status = new_svm.load_serializable(fstream)
	check_status(status,'h5')
	new_svm.train()

	fstream = SerializableAsciiFile("blaah.asc", "r")
	new_svm=GMNPSVM()
	status = new_svm.load_serializable(fstream)
	check_status(status,'asc')
	new_svm.train()

	fstream = SerializableJsonFile("blaah.json", "r")
	new_svm=GMNPSVM()
	status = new_svm.load_serializable(fstream)
	check_status(status,'json')
	new_svm.train()

	fstream = SerializableXmlFile("blaah.xml", "r")
	new_svm=GMNPSVM()
	status = new_svm.load_serializable(fstream)
	check_status(status,'xml')
	new_svm.train()

	os.unlink("blaah.h5")
	os.unlink("blaah.asc")
	os.unlink("blaah.json")
	os.unlink("blaah.xml")
	return svm,new_svm


if __name__=='__main__':
	print('Serialization SVMLight')
	serialization_complex_example(*parameter_list[0])

../examples/documented/python_modular/serialization_matrix_modular.py

# In this example dense toy features is being serialized 

#!/usr/bin/env python
from modshogun import *
from numpy import array
import os

parameter_list=[[[[1.0,2,3],[4,5,6]]]]

def serialization_matrix_modular (m):
	feats=RealFeatures(array(m))
	#feats.io.set_loglevel(0)
	fstream = SerializableAsciiFile("foo.asc", "w")
	feats.save_serializable(fstream)

	l=MulticlassLabels(array([1.0,2,3]))
	fstream = SerializableAsciiFile("foo2.asc", "w")
	l.save_serializable(fstream)

	os.unlink("foo.asc")
	os.unlink("foo2.asc")

if __name__=='__main__':
	print('Serialization Matrix Modular')
	serialization_matrix_modular(*parameter_list[0])

../examples/documented/python_modular/serialization_svmlight_modular.py

# This example shows how to use boost serialization (only available if the compile flag was enabled)
# to serialize/deserialize an SVMLight object. Note that this code is in alpha state.

#!/usr/bin/env python
parameter_list=[[10, 1, 2.1, 2.0]]

def serialization_svmlight_modular (num, dist, width, C):
	from shogun.IO import MSG_DEBUG
	from shogun.Features import RealFeatures, BinaryLabels, DNA, Alphabet
	from shogun.Kernel import WeightedDegreeStringKernel, GaussianKernel
	from shogun.Classifier import SVMLight
	from numpy import concatenate, ones
	from numpy.random import randn, seed

	import sys
	import types
	import random
	import bz2
	try:
		import cPickle as pickle
	except ImportError:
		import pickle as pickle	
	import inspect


	def save(filename, myobj):
		"""
		save object to file using pickle
		
		@param filename: name of destination file
		@type filename: str
		@param myobj: object to save (has to be pickleable)
		@type myobj: obj
		"""

		try:
			f = bz2.BZ2File(filename, 'wb')
		except IOError as details:
			sys.stderr.write('File ' + filename + ' cannot be written\n')
			sys.stderr.write(details)
			return

		pickle.dump(myobj, f, protocol=2)
		f.close()



	def load(filename):
		"""
		Load from filename using pickle
		
		@param filename: name of file to load from
		@type filename: str
		"""
		
		try:
			f = bz2.BZ2File(filename, 'rb')
		except IOError as details:
			sys.stderr.write('File ' + filename + ' cannot be read\n')
			sys.stderr.write(details)
			return

		myobj = pickle.load(f)
		f.close()
		return myobj


	##################################################

	seed(17)
	traindata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1)
	testdata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1);

	trainlab=concatenate((-ones(num), ones(num)));
	testlab=concatenate((-ones(num), ones(num)));

	feats_train=RealFeatures(traindata_real);
	feats_test=RealFeatures(testdata_real);
	kernel=GaussianKernel(feats_train, feats_train, width);
	#kernel.io.set_loglevel(MSG_DEBUG)

	labels=BinaryLabels(trainlab);

	svm=SVMLight(C, kernel, labels)
	svm.train()
	#svm.io.set_loglevel(MSG_DEBUG)

	##################################################

	#print("labels:")
	#print(pickle.dumps(labels))
	#
	#print("features")
	#print(pickle.dumps(feats_train))
	#
	#print("kernel")
	#print(pickle.dumps(kernel))
	#
	#print("svm")
	#print(pickle.dumps(svm))
	#
	#print("#################################")

	fn = "serialized_svm.bz2"
	#print("serializing SVM to file", fn)

	save(fn, svm)

	#print("#################################")

	#print("unserializing SVM")
	svm2 = load(fn)


	#print("#################################")
	#print("comparing training")

	svm2.train()

	#print("objective before serialization:", svm.get_objective())
	#print("objective after serialization:", svm2.get_objective())
	return svm, svm.get_objective(), svm2, svm2.get_objective()

if __name__=='__main__':
	print('Serialization SVMLight')
	serialization_svmlight_modular(*parameter_list[0])

So

../examples/documented/python_modular/so_multiclass.py

#!/usr/bin/env python
#!/usr/bin/env python

import numpy as np

try:
	from shogun.Features 	import RealFeatures
	from shogun.Loss     	import HingeLoss
	from shogun.Structure	import MulticlassModel, MulticlassSOLabels, PrimalMosekSOSVM, RealNumber
except ImportError:
	print "Mosek not available"
	import sys
	sys.exit(0)

def gen_data(num_classes,num_samples,dim):
	np.random.seed(0)
	covs = np.array([[[0., -1. ], [2.5,  .7]],
			 [[3., -1.5], [1.2, .3]],
			 [[ 2,  0  ], [ .0,  1.5 ]]])
	X = np.r_[np.dot(np.random.randn(num_samples, dim), covs[0]) + np.array([0, 10]),
		  np.dot(np.random.randn(num_samples, dim), covs[1]) + np.array([-10, -10]),
		  np.dot(np.random.randn(num_samples, dim), covs[2]) + np.array([10, -10])];
	Y = np.hstack((np.zeros(num_samples), np.ones(num_samples), 2*np.ones(num_samples)))
	return X, Y

# Number of classes
M = 3
# Number of samples of each class
N = 50
# Dimension of the data
dim = 2

traindat, label_traindat = gen_data(M,N,dim)

parameter_list = [[X,y]]

def so_multiclass (fm_train_real=traindat,label_train_multiclass=label_traindat):
	labels = MulticlassSOLabels(label_train_multiclass)
	features = RealFeatures(fm_train_real.T)

	model = MulticlassModel(features, labels)
	loss = HingeLoss()
	sosvm = PrimalMosekSOSVM(model, loss, labels)
	sosvm.train()

	out = sosvm.apply()
	count = 0
	for i in xrange(out.get_num_labels()):
		yi_pred = RealNumber.obtain_from_generic(out.get_label(i))
		if yi_pred.value == label_train_multiclass[i]:
			count = count + 1

	print "Correct classification rate: %0.2f" % ( 100.0*count/out.get_num_labels() )

if __name__=='__main__':
	print('KNN')
	so_milticlass(*parameter_list[0])

Statistics

../examples/documented/python_modular/statistics_hsic.py

#!/usr/bin/env python
#
# This program is free software you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation either version 3 of the License, or
# (at your option) any later version.
#
# Written (C) 2012 Heiko Strathmann
#
from numpy import *
#from pylab import *
from math import pi

def statistics_hsic ():
	from shogun.Features import RealFeatures
	from shogun.Features import DataGenerator
	from shogun.Kernel import GaussianKernel
	from shogun.Statistics import HSIC
	from shogun.Statistics import BOOTSTRAP, HSIC_GAMMA
	from shogun.Distance import EuclideanDistance
	from shogun.Mathematics import Statistics, Math

	# note that the HSIC has to store kernel matrices
	# which upper bounds the sample size
	n=250
	difference=3
	angle=pi/3

	# use data generator class to produce example data
	data=DataGenerator.generate_sym_mix_gauss(n,difference,angle)
	#plot(data[0], data[1], 'x');show()

	# create shogun feature representation
	features_x=RealFeatures(array([data[0]]))
	features_y=RealFeatures(array([data[1]]))

	# compute median data distance in order to use for Gaussian kernel width
	# 0.5*median_distance normally (factor two in Gaussian kernel)
	# However, shoguns kernel width is different to usual parametrization
	# Therefore 0.5*2*median_distance^2
	# Use a subset of data for that, only 200 elements. Median is stable
	subset=Math.randperm_vec(features_x.get_num_vectors())
	subset=subset[0:200]
	features_x.add_subset(subset)
	dist=EuclideanDistance(features_x, features_x)
	distances=dist.get_distance_matrix()
	features_x.remove_subset()
	median_distance=Statistics.matrix_median(distances, True)
	sigma_x=median_distance**2
	features_y.add_subset(subset)
	dist=EuclideanDistance(features_y, features_y)
	distances=dist.get_distance_matrix()
	features_y.remove_subset()
	median_distance=Statistics.matrix_median(distances, True)
	sigma_y=median_distance**2
	print "median distance for Gaussian kernel on x:", sigma_x
	print "median distance for Gaussian kernel on y:", sigma_y
	kernel_x=GaussianKernel(10,sigma_x)
	kernel_y=GaussianKernel(10,sigma_y)

	hsic=HSIC(kernel_x,kernel_y,features_x,features_y)

	# perform test: compute p-value and test if null-hypothesis is rejected for
	# a test level of 0.05 using different methods to approximate
	# null-distribution
	statistic=hsic.compute_statistic()
	print "HSIC:", statistic
	alpha=0.05

	print "computing p-value using bootstrapping"
	hsic.set_null_approximation_method(BOOTSTRAP)
	# normally, at least 250 iterations should be done, but that takes long
	hsic.set_bootstrap_iterations(100)
	# bootstrapping allows usage of unbiased or biased statistic
	p_value=hsic.compute_p_value(statistic)
	thresh=hsic.compute_threshold(alpha)
	print "p_value:", p_value
	print "threshold for 0.05 alpha:", thresh
	print "p_value <", alpha, ", i.e. test sais p and q are dependend:", p_value<alpha

	print "computing p-value using gamma method"
	hsic.set_null_approximation_method(HSIC_GAMMA)
	p_value=hsic.compute_p_value(statistic)
	thresh=hsic.compute_threshold(alpha)
	print "p_value:", p_value
	print "threshold for 0.05 alpha:", thresh
	print "p_value <", alpha, ", i.e. test sais p and q are dependend::", p_value<alpha

	# sample from null distribution (these may be plotted or whatsoever)
	# mean should be close to zero, variance stronly depends on data/kernel
	# bootstrapping, biased statistic
	print "sampling null distribution using bootstrapping"
	hsic.set_null_approximation_method(BOOTSTRAP)
	hsic.set_bootstrap_iterations(100)
	null_samples=hsic.bootstrap_null()
	print "null mean:", mean(null_samples)
	print "null variance:", var(null_samples)
	#hist(null_samples, 100); show()

if __name__=='__main__':
	print('HSIC')
	statistics_hsic()

../examples/documented/python_modular/statistics_kmm.py

#!/usr/bin/env python
from numpy import *
from numpy import random

def statistics_kmm ():
	from shogun.Features import RealFeatures
	from shogun.Features import DataGenerator
	from shogun.Kernel import GaussianKernel, MSG_DEBUG
	from shogun.Statistics import KernelMeanMatching

	data = random.randn(3,10)

	# create shogun feature representation
	features=RealFeatures(data)

	# use a kernel width of sigma=2, which is 8 in SHOGUN's parametrization
	# which is k(x,y)=exp(-||x-y||^2 / tau), in constrast to the standard
	# k(x,y)=exp(-||x-y||^2 / (2*sigma^2)), so tau=2*sigma^2
	kernel=GaussianKernel(10,8)
	kernel.init(features,features)

	kmm = KernelMeanMatching(kernel,array([0,1,2,3,7,8,9],dtype=int32),array([4,5,6],dtype=int32))
	w = kmm.compute_weights()
	print w

if __name__=='__main__':
	print('KernelMeanMatching')
	statistics_kmm()

../examples/documented/python_modular/statistics_linear_time_mmd.py

#!/usr/bin/env python
#
# This program is free software you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation either version 3 of the License, or
# (at your option) any later version.
#
# Written (C) 2012 Heiko Strathmann
#
from numpy import *

def statistics_linear_time_mmd ():
	from shogun.Features import RealFeatures
	from shogun.Features import DataGenerator
	from shogun.Kernel import GaussianKernel
	from shogun.Statistics import LinearTimeMMD
	from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN
	from shogun.Distance import EuclideanDistance
	from shogun.Mathematics import Statistics, Math

	# note that the linear time statistic is designed for much larger datasets
	n=10000
	dim=2
	difference=0.5

	# use data generator class to produce example data
	# in pratice, this generate data function could be replaced by a method
	# that obtains data from a stream
	data=DataGenerator.generate_mean_data(n,dim,difference)
	
	print "dimension means of X", mean(data.T[0:n].T)
	print "dimension means of Y", mean(data.T[n:2*n+1].T)

	# create shogun feature representation
	features=RealFeatures(data)

	# compute median data distance in order to use for Gaussian kernel width
	# 0.5*median_distance normally (factor two in Gaussian kernel)
	# However, shoguns kernel width is different to usual parametrization
	# Therefore 0.5*2*median_distance^2
	# Use a subset of data for that, only 200 elements. Median is stable
	# Using all distances here would blow up memory
	subset=Math.randperm_vec(features.get_num_vectors())
	subset=subset[0:200]
	features.add_subset(subset)
	dist=EuclideanDistance(features, features)
	distances=dist.get_distance_matrix()
	features.remove_subset()
	median_distance=Statistics.matrix_median(distances, True)
	sigma=median_distance**2
	print "median distance for Gaussian kernel:", sigma
	kernel=GaussianKernel(10,sigma)

	mmd=LinearTimeMMD(kernel,features, n)

	# perform test: compute p-value and test if null-hypothesis is rejected for
	# a test level of 0.05
	statistic=mmd.compute_statistic()
	print "test statistic:", statistic
	
	# do the same thing using two different way to approximate null-dstribution
	# bootstrapping and gaussian approximation (ony for really large samples)
	alpha=0.05

	print "computing p-value using bootstrapping"
	mmd.set_null_approximation_method(BOOTSTRAP)
	mmd.set_bootstrap_iterations(50) # normally, far more iterations are needed
	p_value=mmd.compute_p_value(statistic)
	print "p_value:", p_value
	print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
	
	print "computing p-value using gaussian approximation"
	mmd.set_null_approximation_method(MMD1_GAUSSIAN)
	p_value=mmd.compute_p_value(statistic)
	print "p_value:", p_value
	print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
	
	# sample from null distribution (these may be plotted or whatsoever)
	# mean should be close to zero, variance stronly depends on data/kernel
	mmd.set_null_approximation_method(BOOTSTRAP)
	mmd.set_bootstrap_iterations(10) # normally, far more iterations are needed
	null_samples=mmd.bootstrap_null()
	print "null mean:", mean(null_samples)
	print "null variance:", var(null_samples)
	
if __name__=='__main__':
	print('LinearTimeMMD')
	statistics_linear_time_mmd()

../examples/documented/python_modular/statistics_linear_time_mmd_kernel_choice.py

#!/usr/bin/env python
#
# This program is free software you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation either version 3 of the License, or
# (at your option) any later version.
#
# Written (C) 2012 Heiko Strathmann
#
from numpy import *
#from matplotlib import pyplot

# performs learning of optimal non-negative kernel weights for a linear time
# two sample test using the linear time Maximum Mean Discrepancy
def statistics_linear_time_mmd_kernel_choice ():
	from shogun.Features import RealFeatures, CombinedFeatures
	from shogun.Features import DataGenerator
	from shogun.Kernel import GaussianKernel, CombinedKernel
	from shogun.Statistics import LinearTimeMMD
	from shogun.Statistics import BOOTSTRAP, MMD1_GAUSSIAN

	# note that the linear time statistic is designed for much larger datasets
	n=50000
	dim=5
	difference=2

	# use data generator class to produce example data
	# in pratice, this generate data function could be replaced by a method
	# that obtains data from a stream
	data=DataGenerator.generate_mean_data(n,dim,difference)
	
	print "dimension means of X", mean(data.T[0:n].T)
	print "dimension means of Y", mean(data.T[n:2*n+1].T)

	# create kernels/features to choose from
	# here: just a bunch of Gaussian Kernels with different widths
	# real sigmas are 2^-5, ..., 2^10
	sigmas=array([pow(2,x) for x in range(-5,10)])
	
	# shogun has a different parametrization of the Gaussian kernel
	shogun_sigmas=array([x*x*2 for x in sigmas])
	
	# We will use multiple kernels
	kernel=CombinedKernel()
	
	# two separate feature objects here, could also be one with appended data
	features=CombinedFeatures()
	
	# all kernels work on same features
	for i in range(len(sigmas)):
		kernel.append_kernel(GaussianKernel(10, shogun_sigmas[i]))
		features.append_feature_obj(RealFeatures(data))
	
	mmd=LinearTimeMMD(kernel,features, n)
	
	print "start learning kernel weights"
	mmd.set_opt_regularization_eps(10E-5)
	mmd.set_opt_low_cut(10E-5)
	mmd.set_opt_max_iterations(1000)
	mmd.set_opt_epsilon(10E-7)
	mmd.optimize_kernel_weights()
	weights=kernel.get_subkernel_weights()
	print "learned weights:", weights
	#pyplot.plot(array(range(len(sigmas))), weights)
	#pyplot.show()
	print "index of max weight", weights.argmax()

if __name__=='__main__':
	print('LinearTimeMMDKernelChoice')
	statistics_linear_time_mmd_kernel_choice()

../examples/documented/python_modular/statistics_quadratic_time_mmd.py

#!/usr/bin/env python
#
# This program is free software you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation either version 3 of the License, or
# (at your option) any later version.
#
# Written (C) 2012 Heiko Strathmann
#
from numpy import *

def statistics_quadratic_time_mmd ():
	from shogun.Features import RealFeatures
	from shogun.Features import DataGenerator
	from shogun.Kernel import GaussianKernel
	from shogun.Statistics import QuadraticTimeMMD
	from shogun.Statistics import BOOTSTRAP, MMD2_SPECTRUM, MMD2_GAMMA, BIASED, UNBIASED
	from shogun.Distance import EuclideanDistance
	from shogun.Mathematics import Statistics, Math

	# note that the quadratic time mmd has to store kernel matrices
	# which upper bounds the sample size
	n=500
	dim=2
	difference=0.5

	# use data generator class to produce example data
	data=DataGenerator.generate_mean_data(n,dim,difference)
	
	print "dimension means of X", mean(data.T[0:n].T)
	print "dimension means of Y", mean(data.T[n:2*n+1].T)

	# create shogun feature representation
	features=RealFeatures(data)

	# compute median data distance in order to use for Gaussian kernel width
	# 0.5*median_distance normally (factor two in Gaussian kernel)
	# However, shoguns kernel width is different to usual parametrization
	# Therefore 0.5*2*median_distance^2
	# Use a subset of data for that, only 200 elements. Median is stable
	subset=Math.randperm_vec(features.get_num_vectors())
	subset=subset[0:200]
	features.add_subset(subset)
	dist=EuclideanDistance(features, features)
	distances=dist.get_distance_matrix()
	features.remove_subset()
	median_distance=Statistics.matrix_median(distances, True)
	sigma=median_distance**2
	print "median distance for Gaussian kernel:", sigma
	kernel=GaussianKernel(10,sigma)

	mmd=QuadraticTimeMMD(kernel,features, n)

	# perform test: compute p-value and test if null-hypothesis is rejected for
	# a test level of 0.05 using different methods to approximate
	# null-distribution
	statistic=mmd.compute_statistic()
	alpha=0.05
	
	print "computing p-value using bootstrapping"
	mmd.set_null_approximation_method(BOOTSTRAP)
	# normally, at least 250 iterations should be done, but that takes long
	mmd.set_bootstrap_iterations(10)
	# bootstrapping allows usage of unbiased or biased statistic
	mmd.set_statistic_type(UNBIASED)
	p_value=mmd.compute_p_value(statistic)
	print "p_value:", p_value
	print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
	
	# only can do this if SHOGUN was compiled with LAPACK so check
	if "sample_null_spectrum" in dir(QuadraticTimeMMD):
		print "computing p-value using spectrum method"
		mmd.set_null_approximation_method(MMD2_SPECTRUM)
		# normally, at least 250 iterations should be done, but that takes long
		mmd.set_num_samples_sepctrum(50)
		mmd.set_num_eigenvalues_spectrum(n-10)
		# spectrum method computes p-value for biased statistics only
		mmd.set_statistic_type(BIASED)
		p_value=mmd.compute_p_value(statistic)
		print "p_value:", p_value
		print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
	
	print "computing p-value using gamma method"
	mmd.set_null_approximation_method(MMD2_GAMMA)
	# gamma method computes p-value for biased statistics only
	mmd.set_statistic_type(BIASED)
	p_value=mmd.compute_p_value(statistic)
	print "p_value:", p_value
	print "p_value <", alpha, ", i.e. test sais p!=q:", p_value<alpha
	
	# sample from null distribution (these may be plotted or whatsoever)
	# mean should be close to zero, variance stronly depends on data/kernel
	# bootstrapping, biased statistic
	print "sampling null distribution using bootstrapping"
	mmd.set_null_approximation_method(BOOTSTRAP)
	mmd.set_statistic_type(BIASED)
	mmd.set_bootstrap_iterations(10)
	null_samples=mmd.bootstrap_null()
	print "null mean:", mean(null_samples)
	print "null variance:", var(null_samples)
	
	# sample from null distribution (these may be plotted or whatsoever)
	# mean should be close to zero, variance stronly depends on data/kernel
	# spectrum, biased statistic
	print "sampling null distribution using spectrum method"
	mmd.set_null_approximation_method(MMD2_SPECTRUM)
	mmd.set_statistic_type(BIASED)
	# 200 samples using 100 eigenvalues
	null_samples=mmd.sample_null_spectrum(50,10)
	print "null mean:", mean(null_samples)
	print "null variance:", var(null_samples)
	
if __name__=='__main__':
	print('QuadraticTimeMMD')
	statistics_quadratic_time_mmd()

Streaming

../examples/documented/python_modular/streaming_vw_createcache_modular.py

#!/usr/bin/env python
from modshogun import StreamingVwFile
from modshogun import StreamingVwCacheFile
from modshogun import T_SVMLIGHT
from modshogun import StreamingVwFeatures
from modshogun import VowpalWabbit

parameter_list=[['../data/fm_train_sparsereal.dat']]

def streaming_vw_createcache_modular (fname):
	# First creates a binary cache from an ascii data file.
	# and then trains using the StreamingVwCacheFile as input

	# Open the input file as a StreamingVwFile
	input_file = StreamingVwFile(fname)
	# Default file name will be vw_cache.dat.cache
	input_file.set_write_to_cache(True)

	# Tell VW that the file is in SVMLight format
	# Supported types are T_DENSE, T_SVMLIGHT and T_VW
	input_file.set_parser_type(T_SVMLIGHT)

	# Create a StreamingVwFeatures object, `True' indicating the examples are labelled
	features = StreamingVwFeatures(input_file, True, 1024)

	# Create a VW object from the features
	vw = VowpalWabbit(features)
	vw.set_no_training(True)

	# Train (in this case does nothing but run over all examples)
	vw.train()

	#Finally Train using the generated cache file

	# Open the input cache file as a StreamingVwCacheFile
	input_file = StreamingVwCacheFile("vw_cache.dat.cache");

	# The rest is exactly as for normal input
	features = StreamingVwFeatures(input_file, True, 1024);
	vw = VowpalWabbit(features)
	vw.train()
	#return vw

if __name__ == "__main__":
	streaming_vw_createcache_modular(*parameter_list[0])

../examples/documented/python_modular/streaming_vw_modular.py

#!/usr/bin/env python
from modshogun import StreamingVwFile
from modshogun import T_SVMLIGHT
from modshogun import StreamingVwFeatures
from modshogun import VowpalWabbit

parameter_list=[[None]]

def streaming_vw_modular (dummy):
	"""Runs the VW algorithm on a toy dataset in SVMLight format."""

	# Open the input file as a StreamingVwFile
	input_file = StreamingVwFile("../data/fm_train_sparsereal.dat")

	# Tell VW that the file is in SVMLight format
	# Supported types are T_DENSE, T_SVMLIGHT and T_VW
	input_file.set_parser_type(T_SVMLIGHT)

	# Create a StreamingVwFeatures object, `True' indicating the examples are labelled
	features = StreamingVwFeatures(input_file, True, 1024)

	# Create a VW object from the features
	vw = VowpalWabbit(features)

	# Train
	vw.train()

	#return vw

if __name__ == "__main__":
	streaming_vw_modular(*parameter_list[0])

Structure

../examples/documented/python_modular/structure_dynprog_modular.py

# In this example we use the dynamic progaramm implementation with a 
# gene finding specific model. The model and the training parameter 
# are stored in a file and are used to create a gene prediction on 
# some example sequence. 

#!/usr/bin/env python
#!/usr/bin/env python 
# -*- coding: utf-8 -*-

parameter_list=[['../data/DynProg_example_py.pickle.gz']]

from shogun.Structure import *

import numpy
from numpy import array,Inf,float64,matrix,frompyfunc,zeros

#from IPython.Shell import IPShellEmbed
#ipshell = IPShellEmbed()

import gzip
import scipy
from scipy.io import loadmat

import pickle

try:
	from StringIO import StringIO
except ImportError:
	from io import StringIO

def get_ver(ver_str):
	scipy_ver=[int(i) for i in scipy.__version__.split('.')]
	v=0
	for i in xrange(len(scipy_ver)):
		v+=10**(len(scipy_ver)-i)*scipy_ver[i]
	return v

if get_ver(scipy.__version__) >= get_ver('0.7.0'):
	renametable = {
			'scipy.io.mio5': 'scipy.io.matlab.mio5',
			'scipy.sparse.sparse' : 'scipy.sparse',
			}
else:
	renametable = {}

def mapname(name):
	if name in renametable:
		return renametable[name]
	return name

# scipy compatibility class
class mat_struct(object):
    pass

def mapped_load_global(self):
	module = mapname(self.readline()[:-1])
	name = mapname(self.readline()[:-1])

	if name=='mat_struct':
		klass=mat_struct
	else:
		klass = self.find_class(module, name)

	self.append(klass)

def loads(str):
	file = StringIO(str)
	unpickler = pickle.Unpickler(file)
	unpickler.dispatch[pickle.GLOBAL] = mapped_load_global
	return unpickler.load()

def structure_dynprog_modular (fname):

	data_dict = loads(gzip.GzipFile(fname).read())
	#data_dict = loadmat('../data/DynProg_example_py.dat.mat', appendmat=False, struct_as_record=False)

	#print(data_dict)
	#print(len(data_dict['penalty_array'][0][0][0][0].limits[0]))
	num_plifs,num_limits = len(data_dict['penalty_array']),len(data_dict['penalty_array'][0].limits)
	pm = PlifMatrix()
	pm.create_plifs(num_plifs,num_limits)

	ids = numpy.array(list(range(num_plifs)),dtype=numpy.int32)
	min_values = numpy.array(list(range(num_plifs)),dtype=numpy.float64)
	max_values = numpy.array(list(range(num_plifs)),dtype=numpy.float64)
	all_use_cache = numpy.array(list(range(num_plifs)),dtype=numpy.bool)
	all_use_svm = numpy.array(list(range(num_plifs)),dtype=numpy.int32)
	all_limits = zeros((num_plifs,num_limits))
	all_penalties = zeros((num_plifs,num_limits))
	all_names = ['']*num_plifs
	all_transforms = ['']*num_plifs
	for plif_idx in range(num_plifs):
		ids[plif_idx]          = data_dict['penalty_array'][plif_idx].id-1
		min_values[plif_idx]   = data_dict['penalty_array'][plif_idx].min_value
		max_values[plif_idx]   = data_dict['penalty_array'][plif_idx].max_value
		all_use_cache[plif_idx]   = data_dict['penalty_array'][plif_idx].use_cache
		all_use_svm[plif_idx]   = data_dict['penalty_array'][plif_idx].use_svm
		all_limits[plif_idx]   = data_dict['penalty_array'][plif_idx].limits
		all_penalties[plif_idx]   = data_dict['penalty_array'][plif_idx].penalties
		all_names[plif_idx]   = str(data_dict['penalty_array'][plif_idx].name)
		all_transforms[plif_idx]   = str(data_dict['penalty_array'][plif_idx].transform)
		if all_transforms[plif_idx] == '[]':
			all_transforms[plif_idx] = 'linear'

	pm.set_plif_ids(ids)
	pm.set_plif_min_values(min_values)
	pm.set_plif_max_values(max_values)
	pm.set_plif_use_cache(all_use_cache)
	pm.set_plif_use_svm(all_use_svm)
	pm.set_plif_limits(all_limits)
	pm.set_plif_penalties(all_penalties)
	#pm.set_plif_names(all_names)
	#pm.set_plif_transform_type(all_transforms)

	transition_ptrs = data_dict['model'].transition_pointers
	transition_ptrs = transition_ptrs[:,:,0:2]
	transition_ptrs = transition_ptrs.astype(numpy.float64)

	pm.compute_plif_matrix(transition_ptrs)

	# init_dyn_prog
	num_svms = 8
	dyn = DynProg(num_svms)
	orf_info = data_dict['model'].orf_info
	orf_info = orf_info.astype(numpy.int32)
	num_states = orf_info.shape[0]
	dyn.set_num_states(num_states)

	block = data_dict['block']
	seq_len = len(block.seq)
	seq = str(block.seq)
	gene_string = array([elem for elem in seq])

	# precompute_content_svms
	pos = block.all_pos-1
	pos = pos.astype(numpy.int32)
	snd_pos = pos
	dyn.set_pos(pos)
	dyn.set_gene_string(gene_string)
	dyn.create_word_string()
	dyn.precompute_stop_codons()
	dyn.init_content_svm_value_array(num_svms)
	dict_weights = data_dict['content_weights']
	dict_weights = dict_weights.reshape(8,1).astype(numpy.float64)
	dict_weights = zeros((8,5440))
	dyn.set_dict_weights(dict_weights.T)

	dyn.precompute_content_values()

	dyn.init_mod_words_array(data_dict['model'].mod_words.astype(numpy.int32))
	pm.compute_signal_plifs(data_dict['state_signals'].astype(numpy.int32))

	dyn.set_orf_info(orf_info)

	#
	p = data_dict['model'].p
	q = data_dict['model'].q
	dyn.set_p_vector(p)
	dyn.set_q_vector(q)
	a_trans = data_dict['a_trans']
	a_trans = a_trans.astype(float64)

	dyn.set_a_trans_matrix(a_trans)


	dyn.check_svm_arrays()
	features = data_dict['block'].features

	dyn.set_observation_matrix(features)

	dyn.set_content_type_array(data_dict['seg_path'].astype(numpy.float64))
	dyn.best_path_set_segment_loss(data_dict['loss'].astype(numpy.float64))

	use_orf = True
	feat_dims = [25,201,2]

	dyn.set_plif_matrices(pm);

	#dyn.compute_nbest_paths(features.shape[2], use_orf, 1,True,False)

	## fetch results
	#states = dyn.get_states()
	##print(states)
	#scores = dyn.get_scores()
	##print(scores)
	#positions = dyn.get_positions()
	##print(positions)

	#return states, scores, positions

if __name__ == '__main__':
	print("Structure")
	structure_dynprog_modular(*parameter_list[0])

../examples/documented/python_modular/structure_hmsvm_bmrm.py

#!/usr/bin/env python

import numpy
import scipy

from scipy import io
data_dict = scipy.io.loadmat('../data/hmsvm_data_large_integer.mat')

parameter_list=[[data_dict]]

def structure_hmsvm_bmrm (m_data_dict=data_dict):
	from shogun.Features   import RealMatrixFeatures
	from shogun.Loss       import HingeLoss
	from shogun.Structure  import HMSVMLabels, HMSVMModel, Sequence, TwoStateModel, SMT_TWO_STATE
	from shogun.Evaluation import StructuredAccuracy
	from shogun.Structure  import DualLibQPBMSOSVM

	labels_array = m_data_dict['label'][0]

	idxs = numpy.nonzero(labels_array == -1)
	labels_array[idxs] = 0

	labels = HMSVMLabels(labels_array, 250, 500, 2)
	features = RealMatrixFeatures(m_data_dict['signal'].astype(float), 250, 500)

	loss = HingeLoss()
	model = HMSVMModel(features, labels, SMT_TWO_STATE, 4)

	sosvm = DualLibQPBMSOSVM(model, loss, labels, 5000.0)
	sosvm.train()

	print sosvm.get_w()

	predicted = sosvm.apply()
	evaluator = StructuredAccuracy()
	acc = evaluator.evaluate(predicted, labels)

	print('Accuracy = %.4f' % acc)

if __name__ == '__main__':
	print("HMSVM BMRM")
	structure_hmsvm_bmrm(*parameter_list[0])

../examples/documented/python_modular/structure_hmsvm_mosek.py

#!/usr/bin/env python

import numpy
import scipy

from scipy import io
data_dict = scipy.io.loadmat('../data/hmsvm_data_large_integer.mat')

parameter_list=[[data_dict]]

def structure_hmsvm_mosek (m_data_dict=data_dict):
	from shogun.Features   import RealMatrixFeatures
	from shogun.Loss       import HingeLoss
	from shogun.Structure  import HMSVMLabels, HMSVMModel, Sequence, TwoStateModel, SMT_TWO_STATE
	from shogun.Evaluation import StructuredAccuracy

	try:
		from shogun.Structure import PrimalMosekSOSVM
	except ImportError:
		print "Mosek not available"
		import sys
		sys.exit(0)

	labels_array = m_data_dict['label'][0]

	idxs = numpy.nonzero(labels_array == -1)
	labels_array[idxs] = 0

	labels = HMSVMLabels(labels_array, 250, 500, 2)
	features = RealMatrixFeatures(m_data_dict['signal'].astype(float), 250, 500)

	loss = HingeLoss()
	model = HMSVMModel(features, labels, SMT_TWO_STATE, 4)

	sosvm = PrimalMosekSOSVM(model, loss, labels)
	sosvm.train()
	print sosvm.get_w()

	predicted = sosvm.apply()
	evaluator = StructuredAccuracy()
	acc = evaluator.evaluate(predicted, labels)

	print('Accuracy = %.4f' % acc)

if __name__ == '__main__':
	print("HMSVM Mosek")
	structure_hmsvm_mosek(*parameter_list[0])

Tests

../examples/documented/python_modular/tests_check_commwordkernel_memleak_modular.py

#!/usr/bin/env python
parameter_list=[[10,7,0,0]]

def tests_check_commwordkernel_memleak_modular (num, order, gap, reverse):
	import gc
	from shogun.Features import Alphabet,StringCharFeatures,StringWordFeatures,DNA
	from shogun.Preprocessor import SortWordString, MSG_DEBUG
	from shogun.Kernel import CommWordStringKernel, IdentityKernelNormalizer
	from numpy import mat

	POS=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT']
	NEG=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', 
	num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', 
	num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT']

	for i in range(10):
		alpha=Alphabet(DNA)
		traindat=StringCharFeatures(alpha)
		traindat.set_features(POS+NEG)
		trainudat=StringWordFeatures(traindat.get_alphabet());
		trainudat.obtain_from_char(traindat, order-1, order, gap, reverse)
		#trainudat.io.set_loglevel(MSG_DEBUG)
		pre = SortWordString()
		#pre.io.set_loglevel(MSG_DEBUG)
		pre.init(trainudat)
		trainudat.add_preprocessor(pre)
		trainudat.apply_preprocessor()
		spec = CommWordStringKernel(10, False)
		spec.set_normalizer(IdentityKernelNormalizer())
		spec.init(trainudat, trainudat)
		K=spec.get_kernel_matrix()

	del POS
	del NEG
	del order
	del gap
	del reverse
	return K

if __name__=='__main__':
	print('Leak Check Comm Word Kernel')
	tests_check_commwordkernel_memleak_modular(*parameter_list[0])

Transfer

../examples/documented/python_modular/transfer_multitask_clustered_logistic_regression.py

#!/usr/bin/env python
from numpy import array,hstack,sin,cos
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat]]

def transfer_multitask_clustered_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):

	from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup, MultitaskClusteredLogisticRegression, MSG_DEBUG

	features = RealFeatures(hstack((traindat,sin(traindat),cos(traindat))))
	labels = BinaryLabels(hstack((label_train,label_train,label_train)))

	n_vectors = features.get_num_vectors()
	task_one = Task(0,n_vectors/3)
	task_two = Task(n_vectors/3,2*n_vectors/3)
	task_three = Task(2*n_vectors/3,n_vectors)
	task_group = TaskGroup()
	task_group.append_task(task_one)
	task_group.append_task(task_two)
	task_group.append_task(task_three)

	mtlr = MultitaskClusteredLogisticRegression(1.0,100.0,features,labels,task_group,2)
	mtlr.io.set_loglevel(MSG_DEBUG)
	mtlr.set_tolerance(1e-3) # use 1e-2 tolerance
	mtlr.set_max_iter(100)
	mtlr.train()
	mtlr.set_current_task(0)
	print mtlr.get_w()
	out = mtlr.apply_regression().get_labels()

	return out

if __name__=='__main__':
	print('TransferMultitaskClusteredLogisticRegression')
	transfer_multitask_clustered_logistic_regression(*parameter_list[0])

../examples/documented/python_modular/transfer_multitask_l12_logistic_regression.py

#!/usr/bin/env python
from numpy import array,hstack
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat]]

def transfer_multitask_l12_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):

	from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup, MultitaskL12LogisticRegression

	features = RealFeatures(hstack((traindat,traindat)))
	labels = BinaryLabels(hstack((label_train,label_train)))

	n_vectors = features.get_num_vectors()
	task_one = Task(0,n_vectors/2)
	task_two = Task(n_vectors/2,n_vectors)
	task_group = TaskGroup()
	task_group.append_task(task_one)
	task_group.append_task(task_two)

	mtlr = MultitaskL12LogisticRegression(0.1,0.1,features,labels,task_group)
	mtlr.set_tolerance(1e-2) # use 1e-2 tolerance
	mtlr.set_max_iter(10)
	mtlr.train()
	mtlr.set_current_task(0)
	out = mtlr.apply_regression().get_labels()

	return out

if __name__=='__main__':
	print('TransferMultitaskL12LogisticRegression')
	transfer_multitask_l12_logistic_regression(*parameter_list[0])

../examples/documented/python_modular/transfer_multitask_leastsquares_regression.py

#!/usr/bin/env python
from numpy import array
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat]]

def transfer_multitask_leastsquares_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):

	from modshogun import RegressionLabels, RealFeatures, Task, TaskGroup, MultitaskLeastSquaresRegression

	features = RealFeatures(traindat)
	labels = RegressionLabels(label_train)

	n_vectors = features.get_num_vectors()
	task_one = Task(0,n_vectors/2)
	task_two = Task(n_vectors/2,n_vectors)
	task_group = TaskGroup()
	task_group.append_task(task_one)
	task_group.append_task(task_two)

	mtlsr = MultitaskLeastSquaresRegression(0.1,features,labels,task_group)
	mtlsr.set_regularization(1) # use regularization ratio
	mtlsr.set_tolerance(1e-2) # use 1e-2 tolerance
	mtlsr.train()
	mtlsr.set_current_task(0)
	out = mtlsr.apply_regression().get_labels()
	return out

if __name__=='__main__':
	print('TransferMultitaskLeastSquaresRegression')
	transfer_multitask_leastsquares_regression(*parameter_list[0])

../examples/documented/python_modular/transfer_multitask_logistic_regression.py

#!/usr/bin/env python
from numpy import array,hstack
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat]]

def transfer_multitask_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):

	from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup, MultitaskLogisticRegression

	features = RealFeatures(hstack((traindat,traindat)))
	labels = BinaryLabels(hstack((label_train,label_train)))

	n_vectors = features.get_num_vectors()
	task_one = Task(0,n_vectors/2)
	task_two = Task(n_vectors/2,n_vectors)
	task_group = TaskGroup()
	task_group.append_task(task_one)
	task_group.append_task(task_two)

	mtlr = MultitaskLogisticRegression(0.1,features,labels,task_group)
	mtlr.set_regularization(1) # use regularization ratio
	mtlr.set_tolerance(1e-2) # use 1e-2 tolerance
	mtlr.train()
	mtlr.set_current_task(0)
	out = mtlr.apply().get_labels()

	return out

if __name__=='__main__':
	print('TransferMultitaskLogisticRegression')
	transfer_multitask_logistic_regression(*parameter_list[0])

../examples/documented/python_modular/transfer_multitask_trace_logistic_regression.py

#!/usr/bin/env python
from numpy import array,hstack
from numpy.random import seed, rand
from tools.load import LoadMatrix
lm=LoadMatrix()

traindat = lm.load_numbers('../data/fm_train_real.dat')
testdat = lm.load_numbers('../data/fm_test_real.dat')
label_traindat = lm.load_labels('../data/label_train_twoclass.dat')

parameter_list = [[traindat,testdat,label_traindat]]

def transfer_multitask_trace_logistic_regression (fm_train=traindat,fm_test=testdat,label_train=label_traindat):

	from modshogun import BinaryLabels, RealFeatures, Task, TaskGroup, MultitaskTraceLogisticRegression

	features = RealFeatures(hstack((traindat,traindat)))
	labels = BinaryLabels(hstack((label_train,label_train)))

	n_vectors = features.get_num_vectors()
	task_one = Task(0,n_vectors/2)
	task_two = Task(n_vectors/2,n_vectors)
	task_group = TaskGroup()
	task_group.append_task(task_one)
	task_group.append_task(task_two)

	mtlr = MultitaskTraceLogisticRegression(0.1,features,labels,task_group)
	mtlr.set_tolerance(1e-2) # use 1e-2 tolerance
	mtlr.set_max_iter(10)
	mtlr.train()
	mtlr.set_current_task(0)
	out = mtlr.apply_regression().get_labels()

	return out

if __name__=='__main__':
	print('TransferMultitaskTraceLogisticRegression')
	transfer_multitask_trace_logistic_regression(*parameter_list[0])