This page lists ready to run shogun examples for the Static Python interface.

To run the examples issue

python name_of_example.py

Classifier

../examples/documented/python_static/classifier_gpbtsvm.py

# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm Gradient Projection Decomposition Technique
# (GPDT) is used with SVM regularization parameter C=1.2 and a Gaussian
# kernel of width 2.1 and 10MB of kernel cache. 
# 
# For more details on GPDT solver see http://dm.unife.it/gpdt 
#  
#    

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()


traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,1.2,1e-5,False],
		[traindat,testdat,train_label,10,2.1,1.3,1e-4,False]]

def classifier_gpbtsvm (fm_train_real=traindat,fm_test_real=testdat,
			label_train_twoclass=train_label,
			size_cache=10, width=2.1,C=1.2,
			epsilon=1e-5,use_bias=False):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)

	sg('set_labels', 'TRAIN', label_train_twoclass)
	sg('new_classifier', 'GPBTSVM')
	sg('svm_epsilon', epsilon)
	sg('c', C)
	sg('svm_use_bias', use_bias)
	sg('train_classifier')

	sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	return result

if __name__=='__main__':
	print('GPBTSVM')
	classifier_gpbtsvm(*parameter_list[0])

../examples/documented/python_static/classifier_knn.py

# This example shows usage of a k-nearest neighbor (KNN) classification rule on
# a toy data set. The number of the nearest neighbors is set to k=3 and the distances
# are measured by the Euclidean metric. Finally, the KNN rule is applied to predict
# labels of test examples. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()


traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_multiclass.dat')
parameter_list=[[traindat,testdat, train_label,3],
		[traindat,testdat,train_label,4]]

def classifier_knn (fm_train_real=traindat,fm_test_real=testdat,
			label_train_multiclass=train_label,k=3):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_labels', 'TRAIN', label_train_multiclass)
	sg('set_distance', 'EUCLIDEAN', 'REAL')
	sg('new_classifier', 'KNN')
	sg('train_classifier', k)

	sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	return result

if __name__=='__main__':
	print('KNN')
	classifier_knn(*parameter_list[0])

../examples/documented/python_static/classifier_lda.py

# In this example a linear two-class classifier is trained based on the Linear 
# Discriminant Analysis (LDA) from a toy 2-dimensional examples. The trained 
# LDA classifier is used to predict test examples. Note that the LDA classifier
# is optimal under the assumption that both classes are Gaussian distributed with equal
# co-variance. For more details on the LDA see e.g.
#              http://en.wikipedia.org/wiki/Linear_discriminant_analysis
# 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label],
		[traindat,testdat,train_label]]

def classifier_lda (fm_train_real=traindat,fm_test_real=testdat,
			label_train_twoclass=train_label):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_labels', 'TRAIN', label_train_twoclass)
	sg('new_classifier', 'LDA')
	sg('train_classifier')

	sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	return result

if __name__=='__main__': 
	print('LDA')
	classifier_lda(*parameter_list[0])

../examples/documented/python_static/classifier_libsvm.py

# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm LIBSVM is used with SVM regularization
# parameter C=1 and a Gaussian kernel of width 1.2 and 10MB of kernel cache and 
# the precision parameter epsilon=1e-5.
# 
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/ 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,1.2,1e-5,False],
		[traindat,testdat,train_label,10,2.1,1.3,1e-4,False]]

def classifier_libsvm (fm_train_real=traindat,fm_test_real=testdat,
			label_train_twoclass=train_label,
			size_cache=10, width=2.1,C=1.2,
			epsilon=1e-5,use_bias=False):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
	sg('set_labels', 'TRAIN', label_train_twoclass)
	sg('new_classifier', 'LIBSVM')
	sg('svm_epsilon', epsilon)
	sg('c', C)
	sg('svm_use_bias', use_bias)
	sg('train_classifier')

	sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	kernel_matrix = sg('get_kernel_matrix', 'TEST')
	return result, kernel_matrix

if __name__=='__main__':
	print('LibSVM')
	classifier_libsvm(*parameter_list[0])

../examples/documented/python_static/classifier_libsvmoneclass.py

# In this example a one-class support vector machine classifier is trained on a
# toy data set. The training algorithm finds a hyperplane in the RKHS which
# separates the training data from the origin. The one-class classifier is
# typically used to estimate the support of a high-dimesnional distribution. 
# For more details see e.g. 
#   B. Schoelkopf et al. Estimating the support of a high-dimensional
#   distribution. Neural Computation, 13, 2001, 1443-1471. 
# 
# In the example, the one-class SVM is trained by the LIBSVM solver with the
# regularization parameter C=1.2 and the Gaussian kernel of width 2.1 and the
# precision parameter epsilon=1e-5 and 10MB of the kernel cache.
# 
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/ .
# 
# 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()


traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,10,2.1,10.,1e-5,False],
		[traindat,testdat,10,2.1,11.,1e-4,False]]

def classifier_libsvm_oneclass (fm_train_real=traindat,fm_test_real=testdat,
			size_cache=10, width=2.1,C=10.,
			epsilon=1e-5,use_bias=False):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)

	sg('new_classifier', 'LIBSVM_ONECLASS')
	sg('svm_epsilon', epsilon)
	sg('c', C)
	sg('svm_use_bias', use_bias)
	sg('train_classifier')

	sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	kernel_matrix = sg('get_kernel_matrix', 'TEST')
	return result, kernel_matrix

if __name__=='__main__':
	print('LibSVMOneClass')
	classifier_libsvm_oneclass(*parameter_list[0])

../examples/documented/python_static/classifier_mpdsvm.py

# In this example a two-class support vector machine classifier is trained on a
# toy data set and the trained classifier is used to predict labels of test
# examples. As training algorithm the Minimal Primal Dual SVM is used with SVM
# regularization parameter C=1.2 and a Gaussian kernel of width 2.1 and 10MB of
# kernel cache and the precision parameter epsilon=1e-5.
# 
# For more details on the MPD solver see 
#  Kienzle, W. and B. Sch�lkopf: Training Support Vector Machines with Multiple
#  Equality Constraints. Machine Learning: ECML 2005, 182-193. (Eds.) Carbonell,
#  J. G., J. Siekmann, Springer, Berlin, Germany (11 2005)

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()


traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,1.2,1e-5,False],
		[traindat,testdat,train_label,10,2.1,1.3,1e-4,False]]

def classifier_mpdsvm (fm_train_real=traindat,fm_test_real=testdat,
			label_train_twoclass=train_label,
			size_cache=10, width=2.1,C=1.2,
			epsilon=1e-5,use_bias=False):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)

	sg('set_labels', 'TRAIN', label_train_twoclass)
	sg('new_classifier', 'MPDSVM')
	sg('svm_epsilon', epsilon)
	sg('c', C)
	sg('svm_use_bias', use_bias)
	sg('train_classifier')

	sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	kernel_matrix = sg('get_kernel_matrix', 'TEST')
	return result, kernel_matrix

if __name__=='__main__':
	print('MPDSVM')
	classifier_mpdsvm(*parameter_list[0])

../examples/documented/python_static/classifier_perceptron.py

# This example shows how to use the Perceptron algorithm for training a
# two-class linear classifier, i.e.  y = sign( <x,w>+b). The Perceptron algorithm
# works by iteratively passing though the training examples and applying the
# update rule on those examples which are misclassified by the current
# classifier. The Perceptron update rule reads
# 
#   w(t+1) = w(t) + alpha * y_t * x_t
#   b(t+1) = b(t) + alpha * y_t
# 
# where (x_t,y_t) is feature vector and label (must be +1/-1) of the misclassified example
#       (w(t),b(t)) are the current parameters of the linear classifier
#       (w(t+1),b(t+1)) are the new parameters of the linear classifier
#       alpha is the learning rate. 
# 
# The Perceptron algorithm iterates until all training examples are correctly
# classified or the prescribed maximal number of iterations is reached. 
# 
# The learning rate and the maximal number of iterations can be set by
#   sg('set_perceptron_parameters', alpha, max_iter);
# 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat, train_label],
		[traindat,testdat,train_label]]

def classifier_perceptron (fm_train_real=traindat,fm_test_real=testdat,
			label_train_twoclass=train_label):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_labels', 'TRAIN', label_train_twoclass)
	sg('new_classifier', 'PERCEPTRON')
	# often does not converge, mind your data!
	sg('train_classifier')

	sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	return result
if __name__=='__main__':
	print('Perceptron')
	classifier_perceptron(*parameter_list[0])

../examples/documented/python_static/classifier_svmlight.py

# In this example a two-class support vector machine classifier is trained on a
# DNA splice-site detection data set and the trained classifier is used to predict
# labels on test set. As training algorithm SVM^light is used with SVM
# regularization parameter C=1.2 and the Weighted Degree kernel of degree 20 and
# the precision parameter epsilon=1e-5.
# 
# For more details on the SVM^light see
#  T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
#  Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.
# 
# For more details on the Weighted Degree kernel see
#  G. Raetsch, S.Sonnenburg, and B. Schoelkopf. RASE: recognition of alternatively
#  spliced exons in C. elegans. Bioinformatics, 21:369-377, June 2005. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()


traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
train_label=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna, train_label,10,20,1.2,1e-5,False],
		[traindna,testdna,train_label,10,21,1.3,1e-4,False]]

def classifier_svmlight (fm_train_dna=traindna,fm_test_dna=testdna,label_train_dna=train_label,
			size_cache=10, degree=20,C=1.2,
			epsilon=1e-5,use_bias=False):


	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree)

	sg('set_labels', 'TRAIN', label_train_dna)

	try:
		sg('new_classifier', 'SVMLIGHT')
	except RuntimeError:
		return

	sg('svm_epsilon', epsilon)
	sg('c', C)
	sg('svm_use_bias', use_bias)
	sg('train_classifier')

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	result=sg('classify')
	kernel_matrix = sg('get_kernel_matrix', 'TEST')
	return result, kernel_matrix

if __name__=='__main__':
	print('SVMLight')
	classifier_svmlight(*parameter_list[0])

Clustering

../examples/documented/python_static/clustering_hierarchical.py

# In this example an agglomerative hierarchical single linkage clustering method
# is used to cluster a given toy data set. Starting with each object being
# assigned to its own cluster clusters are iteratively merged. Here the clusters
# are merged that have the closest (minimum distance, here set via the Euclidean
# distance object) two elements.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
parameter_list=[[traindat,10,3],[traindat,11,4]]
def clustering_hierarchical (fm_train=traindat, size_cache=10,merges=3):

	sg('set_features', 'TRAIN', fm_train)
	sg('set_distance', 'EUCLIDEAN', 'REAL')
	sg('new_clustering', 'HIERARCHICAL')
	sg('train_clustering', merges)

	[merge_distance, pairs]=sg('get_clustering')
	return [merge_distance, pairs]

if __name__=='__main__':
	print('Hierarchical')
	clustering_hierarchical(*parameter_list[0])

../examples/documented/python_static/clustering_kmeans.py

# In this example the k-means clustering method is used to cluster a given toy
# data set. In k-means clustering one tries to partition n observations into k
# clusters in which each observation belongs to the cluster with the nearest mean.
# The algorithm class constructor takes the number of clusters and a distance to
# be used as input. The distance used in this example is Euclidean distance.
# After training one can fetch the result of clustering by obtaining the cluster
# centers and their radiuses.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
parameter_list=[[traindat,10,3,1000],[traindat,11,4,1500]]

def clustering_kmeans (fm_train=traindat, size_cache=10,k=3,iter=1000):
	sg('set_features', 'TRAIN', fm_train)
	sg('set_distance', 'EUCLIDEAN', 'REAL')
	sg('new_clustering', 'KMEANS')
	sg('train_clustering', k, iter)

	[radi, centers]=sg('get_clustering')
	return [radi, centers]

if __name__=='__main__':
	print('KMeans')
	clustering_kmeans(*parameter_list[0])

Distance

../examples/documented/python_static/distance_braycurtis.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL') 
# from different files and initializes the distance to 'BRAYCURTIS'. 
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by 
# 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and 
# target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between 
# these two matrices is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' 
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist. 
# 
# For more details see doc/classshogun_1_1CBrayCurtisDistance.html.
# 
# Obviously, using the Bray Curtis distance is not limited to this showcase 
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_braycurtis (fm_train_real=traindat,fm_test_real=testdat):
	sg('set_distance', 'BRAYCURTIS', 'REAL')
	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('BrayCurtisDistance')
	distance_braycurtis(*parameter_list[0])

../examples/documented/python_static/distance_canberra.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'CANBERRA'.
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (dissimilarity ratio) matrix is 
# computed by 'get_distance_matrix'. 
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' 
# and target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (dissimilarity ratio)
# matrix between these two data sets is computed by 'get_distance_matrix'. 
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and 
# target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see doc/classshogun_1_1CCanberraMetric.html.
# 
# Obviously, using the Canberra distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_canberra (fm_train_real=traindat,fm_test_real=testdat):

	sg('set_distance', 'CANBERRA', 'REAL')
	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('CanberraMetric')
	distance_canberra(*parameter_list[0])

../examples/documented/python_static/distance_canberraword.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored data sets in 'STRING' representation
# (feature type 'CHAR' with alphabet 'DNA') from different files and 
# initializes the distance to 'CANBERRA' with feature type 'WORD'.
# 
# Data points in this example are defined by the transformation function
# 'convert' and the preprocessing step applied afterwards (defined by
# 'add_preproc' and preprocessor 'SORTWORDSTRING').
# 
# The target 'TRAIN' for 'set_features' controls the binding of the given
# data points. In order to compute a pairwise distance matrix by 
# 'get_distance_matrix', we have to perform two preprocessing steps for
# input data 'TRAIN'. The method 'convert' transforms the input data to 
# a string representation suitable for the selected distance. The individual 
# strings are sorted in ascending order after the execution of 'attach_preproc'.
# A pairwise distance matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' 
# and target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the binding of the given
# data points 'TRAIN' and 'TEST'. In order to compute a pairwise distance 
# matrix between these two data sets by 'get_distance_matrix', we have to 
# perform two preprocessing steps for input data 'TEST'. The method 'convert' 
# transforms the input data 'TEST' to a string representation suitable for 
# the selected distance. The individual strings are sorted in ascending order 
# after the execution of 'attach_preproc'. A pairwise distance matrix between 
# the data sets 'TRAIN' and 'TEST' is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see 
# doc/classshogun_1_1CSortWordString.html,
# doc/classshogun_1_1CPreprocessor.html,
# doc/classshogun_1_1CStringFeatures.html (method obtain_from_char_features) and
# doc/classshogun_1_1CCanberraWordDistance.html.
# 
# Obviously, using the Canberra word distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,3,0,'n'],[traindna,testdna,4,0,'n']]

def distance_canberraword (fm_train_dna=traindna,fm_test_dna=testdna,order=3,
			    gap=0,reverse='n'):

	sg('set_distance', 'CANBERRA', 'WORD')
	sg('add_preproc', 'SORTWORDSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('CanberraWordDistance')
	distance_canberraword(*parameter_list[0])

../examples/documented/python_static/distance_chebyshew.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'CHEBYSHEW'.
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix (maximum of absolute feature
# dimension differences) is computed by 'get_distance_matrix'. 
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' 
# and target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix (maximum 
# of absolute feature dimension differences) between these two data sets is 
# computed. 
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' 
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist. 
# 
# For more details see doc/classshogun_1_1CChebyshewMetric.html.
# 
# Obviously, using the Chebyshew distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_chebyshew (fm_train_real=traindat,fm_test_real=testdat):
	sg('set_distance', 'CHEBYSHEW', 'REAL')
	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('ChebyshewMetric')
	distance_chebyshew(*parameter_list[0])

../examples/documented/python_static/distance_chisquare.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'CHISQUARE'.
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by 
# 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' 
# and target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
# these two matrices is computed by 'get_distance_matrix'. 
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' 
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see doc/classshogun_1_1CChiSquareDistance.html.
# 
# Obviously, using the ChiSquare distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_chisquare (fm_train_real=traindat,fm_test_real=testdat):
	sg('set_distance', 'CHISQUARE', 'REAL')
	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('ChiSquareDistance')
	distance_chisquare(*parameter_list[0])

../examples/documented/python_static/distance_cosine.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'COSINE'.
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by
# 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
# these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see doc/classshogun_1_1CCosineDistance.html.
# 
# Obviously, using the Cosine distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_cosine (fm_train_real=traindat,fm_test_real=testdat):
	sg('set_distance', 'COSINE', 'REAL')
	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('CosineDistance')
	distance_cosine(*parameter_list[0])

../examples/documented/python_static/distance_euclidian.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'EUCLIDIAN'.
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by
# 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
# these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see doc/classshogun_1_1CEuclidianDistance.html.
# 
# Obviously, using the Euclidian distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_euclidean (fm_train_real=traindat,fm_test_real=testdat):
	sg('set_distance', 'EUCLIDEAN', 'REAL')
	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('EuclideanDistance')
	distance_euclidean(*parameter_list[0])

../examples/documented/python_static/distance_geodesic.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'GEODESIC'.
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (shortest path on a sphere) matrix is 
# computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (shortest path on 
# a sphere) matrix between these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see doc/classshogun_1_1CGeodesicMetric.html.
# 
# Obviously, using the Geodesic distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_geodesic (fm_train_real=traindat,fm_test_real=testdat):
	sg('set_distance', 'GEODESIC', 'REAL')
	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('GeodesicMetric')
	distance_geodesic(*parameter_list[0])

../examples/documented/python_static/distance_hammingword.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored data sets in 'STRING' representation
# (feature type 'CHAR' with alphabet 'DNA') from different files and
# initializes the distance to 'HAMMING' with feature type 'WORD'.
# 
# Data points in this example are defined by the transformation function
# 'convert' and the preprocessing step applied afterwards (defined by
# 'add_preproc' and preprocessor 'SORTWORDSTRING').
# 
# The target 'TRAIN' for 'set_features' controls the binding of the given
# data points. In order to compute a pairwise distance matrix by
# 'get_distance_matrix', we have to perform two preprocessing steps for
# input data 'TRAIN'. The method 'convert' transforms the input data to
# a string representation suitable for the selected distance. The individual
# strings are sorted in ascending order after the execution of 'attach_preproc'.
# A pairwise distance matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the binding of the given
# data points 'TRAIN' and 'TEST'. In order to compute a pairwise distance
# matrix between these two data sets by 'get_distance_matrix', we have to
# perform two preprocessing steps for input data 'TEST'. The method 'convert'
# transforms the input data 'TEST' to a string representation suitable for
# the selected distance. The individual strings are sorted in ascending order
# after the execution of 'attach_preproc'. A pairwise distance matrix between
# the data sets 'TRAIN' and 'TEST' is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see
# doc/classshogun_1_1CSortWordString.html,
# doc/classshogun_1_1CPreprocessor.html,
# doc/classshogun_1_1CStringFeatures.html (method obtain_from_char_features) and
# doc/classshogun_1_1CHammingWordDistance.html.
# 
# Obviously, using the Hamming word distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,3,0,'n'],[traindna,testdna,4,0,'n']]

def distance_hammingword (fm_train_dna=traindna,fm_test_dna=testdna,order=3,
			    gap=0,reverse='n'):

	sg('set_distance', 'HAMMING', 'WORD')
	sg('add_preproc', 'SORTWORDSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('HammingWordDistance')
	distance_hammingword(*parameter_list[0])

../examples/documented/python_static/distance_jensen.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'JENSEN'.
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (divergence measure based on the 
# Kullback-Leibler divergence) matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (divergence measure 
# based on the Kullback-Leibler divergence) matrix between these two data sets 
# is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see doc/classshogun_1_1CJensenMetric.html.
# 
# Obviously, using the Jensen-Shannon distance/divergence is not limited to 
# this showcase example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_jensen (fm_train_real=traindat,fm_test_real=testdat):
	sg('set_distance', 'JENSEN', 'REAL')
	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')
	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('JensenMetric')
	distance_jensen(*parameter_list[0])

../examples/documented/python_static/distance_manhatten.py

# n approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'MANHATTAN'.
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (sum of absolute feature
# dimension differences) matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (sum of absolute 
# feature dimension differences) matrix between these two data sets is
# computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see doc/classshogun_1_1CManhattanMetric.html.
# 
# Obviously, using the Manhattan distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_manhatten (fm_train_real=traindat,fm_test_real=testdat):
	sg('set_distance', 'MANHATTAN', 'REAL')

	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('ManhattanMetric')
	distance_manhatten(*parameter_list[0])

../examples/documented/python_static/distance_manhattenword.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored data sets in 'STRING' representation
# (feature type 'CHAR' with alphabet 'DNA') from different files and
# initializes the distance to 'MANHATTAN' with feature type 'WORD'.
# 
# Data points in this example are defined by the transformation function
# 'convert' and the preprocessing step applied afterwards (defined by
# 'add_preproc' and preprocessor 'SORTWORDSTRING').
# 
# The target 'TRAIN' for 'set_features' controls the binding of the given
# data points. In order to compute a pairwise distance matrix by
# 'get_distance_matrix', we have to perform two preprocessing steps for
# input data 'TRAIN'. The method 'convert' transforms the input data to
# a string representation suitable for the selected distance. The individual
# strings are sorted in ascending order after the execution of 'attach_preproc'.
# A pairwise distance matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the binding of the given
# data points 'TRAIN' and 'TEST'. In order to compute a pairwise distance
# matrix between these two data sets by 'get_distance_matrix', we have to
# perform two preprocessing steps for input data 'TEST'. The method 'convert'
# transforms the input data 'TEST' to a string representation suitable for
# the selected distance. The individual strings are sorted in ascending order
# after the execution of 'attach_preproc'. A pairwise distance matrix between
# the data sets 'TRAIN' and 'TEST' is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see
# doc/classshogun_1_1CSortWordString.html,
# doc/classshogun_1_1CPreprocessor.html,
# doc/classshogun_1_1CStringFeatures.html (method obtain_from_char_features) and
# doc/classshogun_1_1CManhattanWordDistance.html.
# 
# Obviously, using the Manhattan word distance is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,3,0,'n'],[traindna,testdna,4,0,'n']]

def distance_manhattenword (fm_train_dna=traindna,fm_test_dna=testdna,order=3,
			    gap=0,reverse='n'):
	sg('set_distance', 'MANHATTAN', 'WORD')
	sg('add_preproc', 'SORTWORDSTRING')

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')
	dm=sg('get_distance_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('ManhattanWordDistance')
	distance_manhattenword(*parameter_list[0])

../examples/documented/python_static/distance_minkowski.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'MINKOWSKI' with
# norm 'k'. Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance matrix is computed by
# 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance matrix between
# these two data sets is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist.
# 
# For more details see doc/classshogun_1_1CMinkowskiMetric.html.
# 
# Obviously, using the Minkowski metric is not limited to this showcase
# example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()


traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,3.],[traindat,testdat,4.]]
def distance_minkowski (fm_train_real=traindat,fm_test_real=testdat,k=3.):
	sg('set_distance', 'MINKOWSKI', 'REAL', k)

	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('MinkowskiMetric')
	distance_minkowski(*parameter_list[0])

../examples/documented/python_static/distance_tanimoto.py

# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
# 
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
# 
# This example loads two stored matrices of real values (feature type 'REAL')
# from different files and initializes the distance to 'TANIMOTO'.
# Each column of the matrices corresponds to one data point.
# 
# The target 'TRAIN' for 'set_features' controls the processing of the given
# data points, where a pairwise distance (extended Jaccard coefficient)
# matrix is computed by 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix' and
# target 'TRAIN'.
# 
# The target 'TEST' for 'set_features' controls the processing of the given
# data points 'TRAIN' and 'TEST', where a pairwise distance (extended
# Jaccard coefficient) matrix between these two data sets is computed by
# 'get_distance_matrix'.
# 
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'
# and target 'TEST'. The 'TRAIN' distance matrix ceased to exist. 
# 
# For more details see doc/classshogun_1_1CTanimotoDistance.html.
# 
# Obviously, using the Tanimoto distance/coefficient is not limited to
# this showcase example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat],[traindat,testdat]]

def distance_tanimoto (fm_train_real=traindat,fm_test_real=testdat):
	sg('set_distance', 'TANIMOTO', 'REAL')

	sg('set_features', 'TRAIN', fm_train_real)
	dm=sg('get_distance_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_real)
	dm=sg('get_distance_matrix', 'TEST')
	return dm

if __name__=='__main__':
	print('TanimotoDistance')
	distance_tanimoto(*parameter_list[0])

Distribution

../examples/documented/python_static/distribution_histogram.py

# In this example the Histogram algorithm object computes a histogram over all
# 16bit unsigned integers in the features.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
cubedna=lm.load_cubes('../data/fm_train_cube.dat')
parameter_list=[[traindna,cubedna,3,0,'n'],[traindna,cubedna,4,0,'n']]

def distribution_histogram(fm_train=traindna,fm_cube=cubedna,order=3,
			    gap=0,reverse='n'):

#	sg('new_distribution', 'HISTOGRAM')
	sg('add_preproc', 'SORTWORDSTRING')

	sg('set_features', 'TRAIN', fm_train, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')
#	sg('train_distribution')
#	histo=sg('get_histogram')

#	num_examples=11
#	num_param=sg('get_histogram_num_model_parameters')
#	for i in xrange(num_examples):
#		for j in xrange(num_param):
#			sg('get_log_derivative %d %d' % (j, i))

#	sg('get_log_likelihood')
#	return sg('get_log_likelihood_sample')

if __name__=='__main__':
	print('Histogram')
	distribution_histogram(*parameter_list[0])

../examples/documented/python_static/distribution_hmm.py

# In this example a hidden markov model with 3 states and 6 transitions is trained
# on a string data set.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
cubedna=lm.load_cubes('../data/fm_train_cube.dat')
parameter_list=[[traindna,cubedna,3,6,1,list(),list()],
		[traindna,cubedna,3,6,1,list(),list()]]

def distribution_hmm(fm_train=traindna,fm_cube=cubedna,N=3,M=6,
			   order=1,hmms=list(),links=list()):
	sg('new_hmm',N, M)
	sg('set_features', 'TRAIN', fm_cube, 'CUBE')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order)
	sg('bw')
	hmm=sg('get_hmm')

	sg('new_hmm', N, M)
	sg('set_hmm', hmm[0], hmm[1], hmm[2], hmm[3])
	likelihood=sg('hmm_likelihood')
	return likelihood

if __name__=='__main__':
	print('HMM')
	distribution_hmm(*parameter_list[0])

../examples/documented/python_static/distribution_linearhmm.py

# Trains an inhomogeneous Markov chain of order 3 on a DNA string data set. Due to
# the structure of the Markov chain it is very similar to a HMM with just one
# chain of connected hidden states - that is why we termed this linear HMM.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()
traindna=lm.load_dna('../data/fm_train_dna.dat')
cubedna=lm.load_cubes('../data/fm_train_cube.dat')
parameter_list=[[traindna,cubedna,3,0,'n'],
		[traindna,cubedna,3,0,'n']]

def distribution_linearhmm (fm_train=traindna,fm_cube=cubedna,
			   order=3,gap=0,reverse='n'):
#	sg('new_distribution', 'LinearHMM')
	sg('add_preproc', 'SORTWORDSTRING')

	sg('set_features', 'TRAIN', fm_train, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')

#	sg('train_distribution')
#	histo=sg('get_histogram')

#	num_examples=11
#	num_param=sg('get_histogram_num_model_parameters')
#	for i in xrange(num_examples):
#		for j in xrange(num_param):
#			sg('get_log_derivative %d %d' % (j, i))

#	sg('get_log_likelihood_sample')


if __name__=='__main__':
	print('LinearHMM')
	distribution_linearhmm(*parameter_list[0])

Kernel

../examples/documented/python_static/kernel_chi2.py

# This is an example for the initialization of the chi2-kernel on real data, where 
# each column of the matrices corresponds to one training/test example. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10],[traindat,testdat,1.5,11]]

def kernel_chi2 (fm_train_real=traindat,fm_test_real=testdat,
		 width=1.4,size_cache=10):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_features', 'TEST', fm_test_real)
	sg('set_kernel', 'CHI2', 'REAL', size_cache, width)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('Chi2')
	kernel_chi2(*parameter_list[0])

../examples/documented/python_static/kernel_combined.py

# This is an example for the initialization of a combined kernel, which is a weighted sum of 
# in this case three kernels on real valued data. The sub-kernel weights are all set to 1. 
# 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.,10],[traindat,testdat,1.5,11]]

def kernel_combined(fm_train_real=traindat,fm_test_real=testdat,
		 weight=1.,size_cache=10):
	sg('clean_kernel')
	sg('clean_features', 'TRAIN')
	sg('clean_features', 'TEST')
	sg('set_kernel', 'COMBINED', size_cache)
	sg('add_kernel', weight, 'LINEAR', 'REAL', size_cache)
	sg('add_features', 'TRAIN', fm_train_real)
	sg('add_features', 'TEST', fm_test_real)
	sg('add_kernel', weight, 'GAUSSIAN', 'REAL', size_cache, 1.)
	sg('add_features', 'TRAIN', fm_train_real)
	sg('add_features', 'TEST', fm_test_real)
	sg('add_kernel', weight, 'POLY', 'REAL', size_cache, 3, False)
	sg('add_features', 'TRAIN', fm_train_real)
	sg('add_features', 'TEST', fm_test_real)

	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('Combined')
	kernel_combined(*parameter_list[0])

../examples/documented/python_static/kernel_commulongstring.py

# This is an example for the initialization of the CommUlongString-kernel. This kernel 
# sums over k-mere matches (k='order'). For efficient computing a preprocessor is used 
# that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted 
# only once. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'],
		[traindna,testdna,11,4,0,'n',False,'FULL']]

def kernel_commulongstring (fm_train_dna=traindna,fm_test_dna=testdna,
			    size_cache=10,
			    order=3,gap=0,reverse='n',
			    use_sign=False,normalization='FULL'):
	sg('add_preproc', 'SORTULONGSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')

	sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('CommUlongString')
	kernel_commulongstring(*parameter_list[0])

../examples/documented/python_static/kernel_commwordstring.py

# This is an example for the initialization of the CommWordString-kernel (aka
# Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel 
# sums over k-mere matches (k='order'). For efficient computing a preprocessor is used 
# that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted 
# only once. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'],
		[traindna,testdna,11,4,0,'n',False,'FULL']]

def kernel_commwordstring (fm_train_dna=traindna,fm_test_dna=testdna,
			    size_cache=10,
			    order=3,gap=0,reverse='n',
			    use_sign=False,normalization='FULL'):

	sg('add_preproc', 'SORTWORDSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')

	sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('CommWordString')
	kernel_commwordstring(*parameter_list[0])

../examples/documented/python_static/kernel_const.py

# The constant kernel gives a trivial kernel matrix with all entries set to the same value 
# defined by the argument 'c'. 
# 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,23.,10],[traindat,testdat,24.,11]]

def kernel_const (fm_train_real=traindat,fm_test_real=testdat,c=23.,size_cache=10):
	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_features', 'TEST', fm_test_real)
	sg('set_kernel', 'CONST', 'REAL', size_cache, c)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('Const')
	kernel_const(*parameter_list[0])

../examples/documented/python_static/kernel_diag.py

# This is an example for the initialization of the diag-kernel. 
# The diag kernel has all kernel matrix entries but those on 
# the main diagonal set to zero. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,23.,10],[traindat,testdat,24.,11]]

def kernel_diag (fm_train_real=traindat,fm_test_real=testdat,diag=23.,
			 size_cache=10):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_features', 'TEST', fm_test_real)
	sg('set_kernel', 'DIAG', 'REAL', size_cache, diag)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('Diag')
	kernel_diag(*parameter_list[0])

../examples/documented/python_static/kernel_fixeddegreestring.py

# The FixedDegree String kernel takes as input two strings of same size and counts the number of matches of length d.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,3,10],[traindna,testdna,4,11]]

def kernel_fixeddegreestring (fm_train_dna=traindna,fm_test_dna=testdna,degree=3,
			    size_cache=10):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('set_kernel', 'FIXEDDEGREE', 'CHAR', size_cache, degree)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('FixedDegreeString')
	kernel_fixeddegreestring(*parameter_list[0])

../examples/documented/python_static/kernel_gaussian.py

# The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10],[traindat,testdat,1.9,11]]

def kernel_gaussian (fm_train_real=traindat,fm_test_real=testdat,
		 width=1.4,size_cache=10):
	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_features', 'TEST', fm_test_real)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('Gaussian')
	kernel_gaussian(*parameter_list[0])

../examples/documented/python_static/kernel_gaussianshift.py

# An experimental kernel inspired by the WeightedDegreePositionStringKernel and the Gaussian kernel.
# The idea is to shift the dimensions of the input vectors against eachother. 'shift_step' is the step 
# size of the shifts and  max_shift is the maximal shift.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.9,2,1,10],[traindat,testdat,1.5,2,1,11]]

def kernel_gaussianshift (fm_train_real=traindat,fm_test_real=testdat,
		 width=1.4,max_shift=2,shift_step=1,size_cache=10):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_features', 'TEST', fm_test_real)
	sg('set_kernel', 'GAUSSIANSHIFT', 'REAL', size_cache, width, max_shift, shift_step)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('GaussianShift')
	kernel_gaussianshift(*parameter_list[0])

../examples/documented/python_static/kernel_linear.py

# This is an example for the initialization of a linear kernel on real valued 
# data using scaling factor 1.2. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.2,10],[traindat,testdat,1.5,11]]

def kernel_linear (fm_train_real=traindat,fm_test_real=testdat,
		scale=1.2,size_cache=10):

	from sg import sg
	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_features', 'TEST', fm_test_real)
	sg('set_kernel', 'LINEAR', 'REAL', size_cache, scale)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('Linear')
	kernel_linear(*parameter_list[0])

../examples/documented/python_static/kernel_linearstring.py

# This is an example for the initialization of a linear kernel on string data. The 
# strings are all of the same length and consist of the characters 'ACGT' corresponding 
# to the DNA-alphabet. Each column of the matrices of type char corresponds to 
# one training/test example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10],
		[traindna,testdna,11]]

def kernel_linearstring (fm_train_dna=traindna,fm_test_dna=testdna,
			    size_cache=10):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('set_kernel', 'LINEAR', 'CHAR', size_cache)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('LinearString')
	kernel_linearstring(*parameter_list[0])

../examples/documented/python_static/kernel_linearword.py

# This is an example for the initialization of a linear kernel on word (2byte) 
# data. 

from tools.load import LoadMatrix
from numpy import ushort
from sg import sg
lm=LoadMatrix()

trainword=ushort(lm.load_numbers('../data/fm_test_word.dat'))
testword=ushort(lm.load_numbers('../data/fm_test_word.dat'))
parameter_list=[[trainword,testword,10,1.4],
	       [trainword,testword,11,1.5]]

def kernel_linearword (fm_train_word=trainword,fm_test_word=testword,
		       size_cache=10, scale=1.4):
	sg('set_features', 'TRAIN', fm_train_word)
	sg('set_features', 'TEST', fm_test_word)
	sg('set_kernel', 'LINEAR', 'WORD', size_cache, scale)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('LinearWord')
	kernel_linearword(*parameter_list[0])

../examples/documented/python_static/kernel_localalignmentstring.py

# This is an example for the initialization of the local alignment kernel on 
# DNA sequences, where each column of the matrices of type char corresponds to 
# one training/test example. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10],
		[traindna,testdna,11]]

def kernel_localalignmentstring (fm_train_dna=traindna,fm_test_dna=testdna,
			    size_cache=10):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('set_kernel', 'LOCALALIGNMENT', 'CHAR', size_cache)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('LocalAlignmentString')
	kernel_localalignmentstring(*parameter_list[0])

../examples/documented/python_static/kernel_localityimprovedstring.py

# This example initializes the locality improved string kernel. The locality improved string 
# kernel is defined on sequences of the same length and inspects letters matching at 
# corresponding positions in both sequences. The kernel sums over all matches in windows of 
# length l and takes this sum to the power of 'inner_degree'. The sum over all these 
# terms along the sequence is taken to the power of 'outer_degree'. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,5,5,7],
		[traindna,testdna,trainlabel,11,6,6,8]]

def kernel_localityimprovedstring (fm_train_dna=traindna,fm_test_dna=testdna,
				 label_train_dna=trainlabel,size_cache=10,
				 length=5,inner_degree=5,outer_degree=7):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('set_kernel', 'LIK', 'CHAR', size_cache, length, inner_degree, outer_degree)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('LocalityImprovedString')
	kernel_localityimprovedstring(*parameter_list[0])

../examples/documented/python_static/kernel_oligostring.py

# This is an example initializing the oligo string kernel which takes distances 
# between matching oligos (k-mers) into account via a gaussian. Variable 'k' defines the length 
# of the oligo and variable 'w' the width of the gaussian. The oligo string kernel is 
# implemented for the DNA-alphabet 'ACGT'. 
#  

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,1.2],
		[traindna,testdna,11,4,1.3]]

def kernel_oligostring (fm_train_dna=traindna,fm_test_dna=testdna,
			    size_cache=10,k=3,width=1.2):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('set_kernel', 'OLIGO', 'CHAR', size_cache, k, width)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('OligoString')
	kernel_oligostring(*parameter_list[0])

../examples/documented/python_static/kernel_pluginestimatehistogram.py

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,3,0,'n'],
		[traindna,testdna,trainlabel,11,4,0,'n']]

def kernel_pluginestimatehistogram (fm_train_dna=traindna,fm_test_dna=testdna,
				 label_train_dna=trainlabel,size_cache=10,
				 order=3,gap=0,reverse='n',):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)

	pseudo_pos=1e-1
	pseudo_neg=1e-1
	sg('new_plugin_estimator', pseudo_pos, pseudo_neg)
	sg('set_labels', 'TRAIN', label_train_dna)
	sg('train_estimator')

	sg('set_kernel', 'HISTOGRAM', 'WORD', size_cache)
	km=sg('get_kernel_matrix', 'TRAIN')

# not supported yet
#	lab=sg('plugin_estimate_classify')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('PluginEstimate w/ HistogramWord')
	kernel_pluginestimatehistogram(*parameter_list[0])

../examples/documented/python_static/kernel_poly.py

# This example initializes the polynomial kernel with real data. 
# If variable 'inhomogene' is 'true' +1 is added to the scalar product 
# before taking it to the power of 'degree'. If 'use_normalization' is 
# set to 'true' then kernel matrix will be normalized by the square roots
# of the diagonal entries. 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,4,False,True,10],
		[traindat,testdat,5,False,True,11]]

def kernel_poly (fm_train_real=traindat,fm_test_real=testdat,
		 degree=4,inhomogene=False,use_normalization=True,size_cache=10):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_features', 'TEST', fm_test_real)
	sg('set_kernel', 'POLY', 'REAL', size_cache, degree, inhomogene, use_normalization)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('Poly')
	kernel_poly(*parameter_list[0])

../examples/documented/python_static/kernel_polymatchstring.py

# This is an example for the initialization of the PolyMatchString kernel on string data. 
# The PolyMatchString kernel sums over the matches of two stings of the same length and 
# takes the sum to the power of 'degree'. The strings consist of the characters 'ACGT' corresponding 
# to the DNA-alphabet. Each column of the matrices of type char corresponds to 
# one training/test example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,False],
		[traindna,testdna,11,4,False]]

def kernel_polymatchstring (fm_train_dna=traindna,fm_test_dna=testdna,
			    size_cache=10,degree=3,inhomogene=False):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('set_kernel', 'POLYMATCH', 'CHAR', size_cache, degree, inhomogene)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('PolyMatchString')
	kernel_polymatchstring(*parameter_list[0])

../examples/documented/python_static/kernel_polymatchword.py

# The PolyMatchWordString kernel is defined on strings of equal length. 
# The kernel sums over the matches of two stings of the same length and 
# takes the sum to the power of 'degree'. The strings in this example 
# consist of the characters 'ACGT' corresponding to the DNA-alphabet. Each 
# column of the matrices of type char corresponds to one training/test example.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,2,True,True,3,0,'n'],
		[traindna,testdna,trainlabel,11,3,True,True,4,0,'n']]

def kernel_polymatchword (fm_train_dna=traindna,fm_test_dna=testdna,
				   label_train_dna=trainlabel,size_cache=10,
				   degree=2,inhomogene=True,normalize=True,
				   order=3,gap=0,reverse='n'):

	sg('add_preproc', 'SORTWORDSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')

	sg('set_kernel', 'POLYMATCH', 'WORD', size_cache, degree, inhomogene, normalize)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('PolyMatchWord')
	kernel_polymatchword(*parameter_list[0])

../examples/documented/python_static/kernel_salzbergstring.py

# The SalzbergWordString kernel implements the Salzberg kernel.
# 
# It is described in
# 
# Engineering Support Vector Machine Kernels That Recognize Translation Initiation Sites
# A. Zien, G.Raetsch, S. Mika, B. Schoelkopf, T. Lengauer, K.-R. Mueller
# 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,3,0,'n',False,'FULL'],
		[traindna,testdna,trainlabel,11,4,0,'n',False,'FULL']]

def kernel_salzbergstring (fm_train_dna=traindna,fm_test_dna=testdna,
				   label_train_dna=trainlabel,size_cache=10,
				   order=3,gap=0,reverse='n',use_sign=False,
				   normalization='FULL'):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)

	pseudo_pos=1e-1
	pseudo_neg=1e-1
	sg('new_plugin_estimator', pseudo_pos, pseudo_neg)
	sg('set_labels', 'TRAIN', label_train_dna)
	sg('train_estimator')

	sg('set_kernel', 'SALZBERG', 'WORD', size_cache)
	#sg('set_prior_probs', 0.4, 0.6)
	sg('set_prior_probs_from_labels', label_train_dna)
	km=sg('get_kernel_matrix', 'TRAIN')

# not supported yet
#	lab=sg('plugin_estimate_classify')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('PluginEstimate w/ SalzbergWord')
	kernel_salzbergstring(*parameter_list[0])

../examples/documented/python_static/kernel_sigmoid.py

# The standard Sigmoid kernel computed on dense real valued features.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,11,1.2,1.3,10],[traindat,testdat,12,1.3,1.4,11]]

def kernel_sigmoid (fm_train_real=traindat,fm_test_real=testdat,
		 num_feats=11,gamma=1.2,coef0=1.3,size_cache=10):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_features', 'TEST', fm_test_real)
	sg('set_kernel', 'SIGMOID', 'REAL', size_cache, gamma, coef0)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('Sigmoid')
	kernel_sigmoid(*parameter_list[0])

../examples/documented/python_static/kernel_simplelocalityimprovedstring.py

# SimpleLocalityImprovedString kernel, is a ``simplified'' and better performing version of the Locality improved kernel.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,5,5,7],
		[traindna,testdna,trainlabel,11,6,6,8]]

def kernel_simplelocalityimprovedstring (fm_train_dna=traindna,fm_test_dna=testdna,
				 label_train_dna=trainlabel,size_cache=10,
				 length=5,inner_degree=5,outer_degree=7):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('set_kernel', 'SLIK', 'CHAR', size_cache, length, inner_degree, outer_degree)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('SimpleLocalityImprovedString')
	kernel_simplelocalityimprovedstring(*parameter_list[0])

../examples/documented/python_static/kernel_weightedcommwordstring.py

# The WeightedCommWordString kernel may be used to compute the weighted
# spectrum kernel (i.e. a spectrum kernel for 1 to K-mers, where each k-mer
# length is weighted by some coefficient \f$\beta_k\f$) from strings that have
# been mapped into unsigned 16bit integers.
# 
# These 16bit integers correspond to k-mers. To applicable in this kernel they
# need to be sorted (e.g. via the SortWordString pre-processor).
# 
# It basically uses the algorithm in the unix "comm" command (hence the name)
# to compute:
# 
# k({\bf x},({\bf x'})= \sum_{k=1}^K\beta_k\Phi_k({\bf x})\cdot \Phi_k({\bf x'})
# 
# where \f$\Phi_k\f$ maps a sequence \f${\bf x}\f$ that consists of letters in
# \f$\Sigma\f$ to a feature vector of size \f$|\Sigma|^k\f$. In this feature
# vector each entry denotes how often the k-mer appears in that \f${\bf x}\f$.
# 
# Note that this representation is especially tuned to small alphabets
# (like the 2-bit alphabet DNA), for which it enables spectrum kernels
# of order 8.
# 
# For this kernel the linadd speedups are quite efficiently implemented using
# direct maps.
# 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
trainlabel=lm.load_labels('../data/label_train_dna.dat')
parameter_list=[[traindna,testdna,trainlabel,10,3,0,'n',False,'FULL'],
		[traindna,testdna,trainlabel,11,4,0,'n',False,'FULL']]

def kernel_weightedcommwordstring (fm_train_dna=traindna,fm_test_dna=testdna,
				   label_train_dna=trainlabel,size_cache=10,
				   order=3,gap=0,reverse='n',use_sign=False,
				   normalization='FULL'):

	sg('add_preproc', 'SORTWORDSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')

	sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', size_cache, use_sign, normalization)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('WeightedCommWordString')
	kernel_weightedcommwordstring(*parameter_list[0])

../examples/documented/python_static/kernel_weighteddegreepositionstring.py

# The Weighted Degree Position String kernel (Weighted Degree kernel with shifts).
# 
# The WD-shift kernel of order d compares two sequences X and
# Y of length L by summing all contributions of k-mer matches of
# lengths k in 1...d, weighted by coefficients beta_k
# allowing for a positional tolerance of up to shift s.
# 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,20],
		[traindna,testdna,11,21]]

def kernel_weighteddegreepositonstring (fm_train_dna=traindna,fm_test_dna=testdna,
			size_cache=10,degree=20):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('set_kernel', 'WEIGHTEDDEGREEPOS', 'CHAR', size_cache, degree)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('WeightedDegreePositionString')
	kernel_weighteddegreepositonstring(*parameter_list[0])

../examples/documented/python_static/kernel_weighteddegreestring.py

# The Weighted Degree String kernel.
# 
# The WD kernel of order d compares two sequences X and
# Y of length L by summing all contributions of k-mer matches of
# lengths k in 1...d , weighted by coefficients beta_k. It
# is defined as
# 
#     k(X, Y)=\sum_{k=1}^d\beta_k\sum_{l=1}^{L-k+1}I(u_{k,l}(X)=u_{k,l}(Y)).
# 
# Here, $u_{k,l}(X)$ is the string of length k starting at position
# l of the sequence X and I(.) is the indicator function
# which evaluates to 1 when its argument is true and to 0
# otherwise.
# 

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,20],
		[traindna,testdna,11,21]]

def kernel_weighteddegreestring (fm_train_dna=traindna,fm_test_dna=testdna,
		size_cache=10,degree=20):

	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('WeightedDegreeString')
	kernel_weighteddegreestring(*parameter_list[0])

Mkl

../examples/documented/python_static/mkl_multiclass.py

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
trainlabel=lm.load_labels('../data/label_train_multiclass.dat')
parameter_list=[[traindat,testdat,trainlabel,10,1.2,1.2,1e-5,0.001,1.5,1.0],
		[traindat,testdat,trainlabel,11,1.3,1.3,1e-5,0.002,1.6,1.1]]

def mkl_multiclass (fm_train_real=traindat,fm_test_real=testdat,
		label_train_multiclass=trainlabel,
		size_cache=10,width=1.2,C=1.2,epsilon=1e-5,
		mkl_eps=0.001,mkl_norm=1.5,weight=1.0):

	sg('clean_kernel')
	sg('clean_features', 'TRAIN')
	sg('clean_features', 'TEST')
	sg('set_kernel', 'COMBINED', size_cache)
	sg('add_kernel', weight, 'LINEAR', 'REAL', size_cache)
	sg('add_features', 'TRAIN', fm_train_real)
	sg('add_features', 'TEST', fm_test_real)
	sg('add_kernel', weight, 'GAUSSIAN', 'REAL', size_cache, width)
	sg('add_features', 'TRAIN', fm_train_real)
	sg('add_features', 'TEST', fm_test_real)
	sg('add_kernel', weight, 'POLY', 'REAL', size_cache, 2)
	sg('add_features', 'TRAIN', fm_train_real)
	sg('add_features', 'TEST', fm_test_real)

	sg('set_labels', 'TRAIN', label_train_multiclass)
	sg('new_classifier', 'MKL_MULTICLASS')
	sg('svm_epsilon', epsilon)
	sg('c', C)
	sg('mkl_parameters', mkl_eps, 0.0, mkl_norm)
	sg('train_classifier')

	#sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	return result

if __name__=='__main__':
	print('mkl_multiclass')
	mkl_multiclass(*parameter_list[0])

../examples/documented/python_static/mkl_regression.py

from tools.load import LoadMatrix
from sg import sg
from numpy import *
num=100
labelstrain=concatenate((-ones([1,num]), ones([1,num])),1)[0]
featuretrain=concatenate((random.normal(size=(2,num))-1,random.normal(size=(2,num))+1),1)
parameter_list=[[1.,labelstrain,featuretrain,1e-2],
                [1.,labelstrain,featuretrain,1e-2]]

def mkl_regression (weight=1.,
		labels=labelstrain,features=featuretrain,
		tube_epsilon=1e-2):

	sg('new_classifier', 'MKL_REGRESSION')
	sg('c', 1.)
	sg('svr_tube_epsilon', tube_epsilon)

	sg('set_labels', 'TRAIN', labels)
	sg('add_features', 'TRAIN', features)
	sg('add_features', 'TRAIN', features)
	sg('add_features', 'TRAIN', features)

	sg('set_kernel', 'COMBINED', 100)
	sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 100.)
	sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 10.)
	sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 1.)
	sg('train_classifier')
	[bias, alphas]=sg('get_svm');

	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('MKL_REGRESSION')
	mkl_regression(*parameter_list[0])

../examples/documented/python_static/mkl_twoclass.py

from tools.load import LoadMatrix
from sg import sg
from numpy import *
num=100
labelstrain=concatenate((-ones([1,num]), ones([1,num])),1)[0]
featuretrain=concatenate((random.normal(size=(2,num))-1,random.normal(size=(2,num))+1),1)
parameter_list=[[1.,labelstrain,featuretrain],
				[1.,labelstrain,featuretrain]]

def mkl_twoclass (weight=1.,
		labels=labelstrain,features=featuretrain):

	sg('c', 10.)
	sg('new_classifier', 'MKL_CLASSIFICATION')

	sg('set_labels', 'TRAIN', labels)
	sg('add_features', 'TRAIN', features)
	sg('add_features', 'TRAIN', features)
	sg('add_features', 'TRAIN', features)

	sg('set_kernel', 'COMBINED', 100)
	sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 100.)
	sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 10.)
	sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 1.)
	sg('train_classifier')
	[bias, alphas]=sg('get_svm');

	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('MKL_TWOCLASS')
	mkl_twoclass(*parameter_list[0])

Multiclass

../examples/documented/python_static/multiclass_gmnpsvm.py

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()


traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_multiclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,1.2,1e-5,False],
		[traindat,testdat,train_label,10,2.1,1.3,1e-4,False]]

def classifier_gmnpsvm (fm_train_real=traindat,fm_test_real=testdat,
			label_train_multiclass=train_label,
			size_cache=10, width=2.1,C=1.2,
			epsilon=1e-5,use_bias=False):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)

	sg('set_labels', 'TRAIN', label_train_multiclass)
	sg('new_classifier', 'GMNPSVM')
	sg('svm_epsilon', epsilon)
	sg('c', C)
	sg('svm_use_bias', use_bias)
	sg('train_classifier')

	sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	kernel_matrix = sg('get_kernel_matrix', 'TEST')
	return result, kernel_matrix

if __name__=='__main__':
	print('GMNPSVM')
	classifier_gmnpsvm(*parameter_list[0])

../examples/documented/python_static/multiclass_multiclasslibsvm.py

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()


traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
train_label=lm.load_labels('../data/label_train_multiclass.dat')
parameter_list=[[traindat,testdat, train_label,10,2.1,10.,1e-5,False],
		[traindat,testdat,train_label,10,2.1,11.,1e-4,False]]

def classifier_libsvm_multiclass (fm_train_real=traindat,fm_test_real=testdat,
			label_train_multiclass=train_label,
			size_cache=10, width=2.1,C=10.,
			epsilon=1e-5,use_bias=False):

	sg('set_features', 'TRAIN', fm_train_real)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)

	sg('set_labels', 'TRAIN', label_train_multiclass)
	sg('new_classifier', 'LIBSVM_MULTICLASS')
	sg('svm_epsilon', epsilon)
	sg('c', C)
	sg('svm_use_bias', use_bias)
	sg('train_classifier')

	sg('set_features', 'TEST', fm_test_real)
	result=sg('classify')
	kernel_matrix = sg('get_kernel_matrix', 'TEST')
	return result, kernel_matrix

if __name__=='__main__':
	print('LibSVMMulticlass')
	classifier_libsvm_multiclass(*parameter_list[0])

Preproc

../examples/documented/python_static/preproc_logplusone.py

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# LogPlusOne adds one to a dense real-valued vector and takes the logarithm of
# each component of it. It is most useful in situations where the inputs are
# counts: When one compares differences of small counts any difference may matter
# a lot, while small differences in large counts don't. This is what this log
# transformation controls for.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10],[traindat,testdat,1.5,11]]

def preproc_logplusone (fm_train_real=traindat,fm_test_real=testdat,
		 width=1.4,size_cache=10):

	sg('add_preproc', 'LOGPLUSONE')
	sg('set_kernel', 'CHI2', 'REAL', size_cache, width)

	sg('set_features', 'TRAIN', fm_train_real)
	sg('attach_preproc', 'TRAIN')
	km=sg('get_kernel_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_real)
	sg('attach_preproc', 'TEST')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('LogPlusOne')
	preproc_logplusone(*parameter_list[0])

../examples/documented/python_static/preproc_normone.py

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# NormOne, normalizes vectors to have norm 1.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10],[traindat,testdat,1.5,11]]

def preproc_normone (fm_train_real=traindat,fm_test_real=testdat,
		 width=1.4,size_cache=10):
	sg('add_preproc', 'NORMONE')
	sg('set_kernel', 'CHI2', 'REAL', size_cache, width)

	sg('set_features', 'TRAIN', fm_train_real)
	sg('attach_preproc', 'TRAIN')
	km=sg('get_kernel_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_real)
	sg('attach_preproc', 'TEST')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('NormOne')
	preproc_normone(*parameter_list[0])

../examples/documented/python_static/preproc_prunevarsubmean.py

# In this example a kernel matrix is computed for a given real-valued data set.
# The kernel used is the Chi2 kernel which operates on real-valued vectors. It
# computes the chi-squared distance between sets of histograms. It is a very
# useful distance in image recognition (used to detect objects). The preprocessor
# PruneVarSubMean substracts the mean from each feature and removes features that
# have zero variance.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat,1.4,10,True],[traindat,testdat,1.5,11,True]]

def preproc_prunevarsubmean (fm_train_real=traindat,fm_test_real=testdat,
		 width=1.4,size_cache=10,divide_by_std=True):

	sg('add_preproc', 'PRUNEVARSUBMEAN', divide_by_std)
	sg('set_kernel', 'CHI2', 'REAL', size_cache, width)

	sg('set_features', 'TRAIN', fm_train_real)
	sg('attach_preproc', 'TRAIN')
	km=sg('get_kernel_matrix', 'TRAIN')

	sg('set_features', 'TEST', fm_test_real)
	sg('attach_preproc', 'TEST')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('PruneVarSubMean')
	preproc_prunevarsubmean(*parameter_list[0])

../examples/documented/python_static/preproc_sortulongstring.py

# In this example a kernel matrix is computed for a given string data set. The
# CommUlongString kernel is used to compute the spectrum kernel from strings that
# have been mapped into unsigned 64bit integers. These 64bit integers correspond
# to k-mers. To be applicable in this kernel the mapped k-mers have to be sorted.
# This is done using the SortUlongString preprocessor, which sorts the indivual
# strings in ascending order. The kernel function basically uses the algorithm in
# the unix "comm" command (hence the name). Note that this representation enables
# spectrum kernels of order 8 for 8bit alphabets (like binaries) and order 32 for
# 2-bit alphabets like DNA. For this kernel the linadd speedups are implemented
# (though there is room for improvement here when a whole set of sequences is
# ADDed) using sorted lists.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'],
		[traindna,testdna,11,4,0,'n',False,'FULL']]

def preproc_sortulongstring (fm_train_dna=traindna,fm_test_dna=testdna,
				 size_cache=10,order=3,gap=0,reverse='n',
				 use_sign=False,normalization='FULL'):

	sg('add_preproc', 'SORTULONGSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')

	sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('CommUlongString')
	preproc_sortulongstring(*parameter_list[0])

../examples/documented/python_static/preproc_sortwordstring.py

# In this example a kernel matrix is computed for a given string data set. The
# CommWordString kernel is used to compute the spectrum kernel from strings that
# have been mapped into unsigned 16bit integers. These 16bit integers correspond
# to k-mers. To be applicable in this kernel the mapped k-mers have to be sorted.
# This is done using the SortWordString preprocessor, which sorts the indivual
# strings in ascending order. The kernel function basically uses the algorithm in
# the unix "comm" command (hence the name). Note that this representation is
# especially tuned to small alphabets (like the 2-bit alphabet DNA), for which it
# enables spectrum kernels of order up to 8. For this kernel the linadd speedups
# are quite efficiently implemented using direct maps.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindna=lm.load_dna('../data/fm_train_dna.dat')
testdna=lm.load_dna('../data/fm_test_dna.dat')
parameter_list=[[traindna,testdna,10,3,0,'n',False,'FULL'],
		[traindna,testdna,11,4,0,'n',False,'FULL']]

def preproc_sortwordstring (fm_train_dna=traindna,fm_test_dna=testdna,
				 size_cache=10,order=3,gap=0,reverse='n',
				 use_sign=False,normalization='FULL'):

	sg('add_preproc', 'SORTWORDSTRING')
	sg('set_features', 'TRAIN', fm_train_dna, 'DNA')
	sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TRAIN')

	sg('set_features', 'TEST', fm_test_dna, 'DNA')
	sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse)
	sg('attach_preproc', 'TEST')

	sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization)
	km=sg('get_kernel_matrix', 'TRAIN')
	km=sg('get_kernel_matrix', 'TEST')
	return km

if __name__=='__main__':
	print('CommWordString')
	preproc_sortwordstring(*parameter_list[0])

Regression

../examples/documented/python_static/regression_krr.py

# In this example a kernelized version of ridge regression (KRR) is trained on a
# real-valued data set. The KRR is trained with regularization parameter tau=1e-6
# and a gaussian kernel with width=0.8.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
trainlabel=lm.load_labels('../data/label_train_regression.dat')
parameter_list=[[traindat,testdat,trainlabel,10,2.1,1.2,1e-6],
		[traindat,testdat,trainlabel,11,2.3,1.3,1e-6]]

def regression_krr (fm_train=traindat,fm_test=testdat,
		label_train=trainlabel,size_cache=10,width=2.1,
		C=1.2,tau=1e-6):

	sg('set_features', 'TRAIN', fm_train)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)

	sg('set_labels', 'TRAIN', label_train)

	sg('new_regression', 'KERNELRIDGEREGRESSION')
	sg('krr_tau', tau)
	sg('c', C)
	sg('train_regression')

	sg('set_features', 'TEST', fm_test)
	result=sg('classify')
	return result

if __name__=='__main__':
	print('KRR')
	regression_krr(*parameter_list[0])

../examples/documented/python_static/regression_libsvr.py

# In this example a support vector regression algorithm is trained on a
# real-valued toy data set. The underlying library used for the SVR training is
# LIBSVM. The SVR is trained with regularization parameter C=1 and a gaussian
# kernel with width=2.1.
# 
# For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/ .

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
trainlabel=lm.load_labels('../data/label_train_regression.dat')
parameter_list=[[traindat,testdat,trainlabel,10,2.1,1.2,1e-5,1e-2],
		[traindat,testdat,trainlabel,11,2.3,1.3,1e-6,1e-3]]

def regression_libsvr (fm_train=traindat,fm_test=testdat,
		label_train=trainlabel,size_cache=10,width=2.1,
		C=1.2,epsilon=1e-5,tube_epsilon=1e-2):

	sg('set_features', 'TRAIN', fm_train)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)

	sg('set_labels', 'TRAIN', label_train)
	sg('new_regression', 'LIBSVR')
	sg('svr_tube_epsilon', tube_epsilon)
	sg('c', C)
	sg('train_regression')

	sg('set_features', 'TEST', fm_test)
	result=sg('classify')
	return result

if __name__=='__main__':
	print('LibSVR')
	regression_libsvr(*parameter_list[0])

../examples/documented/python_static/regression_svrlight.py

# In this example a support vector regression algorithm is trained on a
# real-valued toy data set. The underlying library used for the SVR training is
# SVM^light. The SVR is trained with regularization parameter C=1 and a gaussian
# kernel with width=2.1.
# 
# For more details on the SVM^light see
#  T. Joachims. Making large-scale SVM learning practical. In Advances in Kernel
#  Methods -- Support Vector Learning, pages 169-184. MIT Press, Cambridge, MA USA, 1999.

from tools.load import LoadMatrix
from sg import sg
lm=LoadMatrix()

traindat=lm.load_numbers('../data/fm_train_real.dat')
testdat=lm.load_numbers('../data/fm_test_real.dat')
trainlabel=lm.load_labels('../data/label_train_twoclass.dat')
parameter_list=[[traindat,testdat,trainlabel,10,2.1,1.2,1e-5,1e-2],
		[traindat,testdat,trainlabel,11,2.3,1.3,1e-6,1e-3]]

def regression_svrlight (fm_train=traindat,fm_test=testdat,
		label_train=trainlabel,size_cache=10,width=2.1,
		C=1.2,epsilon=1e-5,tube_epsilon=1e-2):

	sg('set_features', 'TRAIN', fm_train)
	sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width)

	sg('set_labels', 'TRAIN', label_train)

	try:
		sg('new_regression', 'SVRLIGHT')
	except RuntimeError:
		return

	sg('svr_tube_epsilon', tube_epsilon)
	sg('c', C)
	sg('train_regression')

	sg('set_features', 'TEST', fm_test)
	result=sg('classify')
	return result

if __name__=='__main__':
	print('SVRLight')
	regression_svrlight(*parameter_list[0])