|
SHOGUN
v2.0.0
|
This page lists ready to run shogun examples for the Lua Modular interface.
To run the examples issue
lua name_of_example.lua
- In this example the Averaged Perceptron used to classify toy data.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')
label_traindat = load_labels('../data/label_train_twoclass.dat')
parameter_list = {{traindat,testdat,label_traindat,1.,1000,1},{traindat,testdat,label_traindat,1.,100,1}}
function classifier_averaged_perceptron_modular (fm_train_real,fm_test_real,label_train_twoclass,learn_rate,max_iter,num_threads)
feats_train=modshogun.RealFeatures(fm_train_real)
feats_test=modshogun.RealFeatures(fm_test_real)
labels=modshogun.BinaryLabels(label_train_twoclass)
perceptron=modshogun.AveragedPerceptron(feats_train, labels)
perceptron:set_learn_rate(learn_rate)
perceptron:set_max_iter(max_iter)
perceptron:train()
perceptron:set_features(feats_test)
out_labels = perceptron:apply():get_labels()
return perceptron, out_labels
end
if debug.getinfo(3) == nill then
print 'AveragedPerceptron'
classifier_averaged_perceptron_modular(unpack(parameter_list[1]))
end
- In this example a two-class support vector machine classifier is trained on a
- 2-dimensional randomly generated data set and the trained classifier is used to
- predict labels of test examples. As training algorithm the LIBSVM solver is used
- with SVM regularization parameter C=1 and a Gaussian kernel of width 2.1.
-
- For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/
require 'modshogun'
require 'load'
function concatenate(...)
local result = ...
for _,t in ipairs{select(2, ...)} do
for row,rowdata in ipairs(t) do
for col,coldata in ipairs(rowdata) do
table.insert(result[row], coldata)
end
end
end
return result
end
function rand_matrix(rows, cols, dist)
local matrix = {}
for i = 1, rows do
matrix[i] = {}
for j = 1, cols do
matrix[i][j] = math.random() + dist
end
end
return matrix
end
function ones(num)
r={}
for i=1,num do
r[i]=1
end
return r
end
num=1000
dist=1
width=2.1
C=1
traindata_real=concatenate(rand_matrix(2,num, -dist),rand_matrix(2,num,dist))
testdata_real=concatenate(rand_matrix(2,num,-dist), rand_matrix(2,num, dist))
trainlab={}
for i = 1, num do
trainlab[i] = -1
trainlab[i + num] = 1
end
testlab={}
for i = 1, num do
testlab[i] = -1
testlab[i + num] = 1
end
feats_train=modshogun.RealFeatures(traindata_real)
feats_test=modshogun.RealFeatures(testdata_real)
kernel=modshogun.GaussianKernel(feats_train, feats_train, width)
labels=modshogun.BinaryLabels(trainlab)
svm=modshogun.LibSVM(C, kernel, labels)
svm:train()
kernel:init(feats_train, feats_test)
out=svm:apply():get_labels()
err_num = 0
for i = 1, num do
if out[i] > 0 then
err_num = err_num+1
end
if out[i+num] < 0 then
err_num = err_num+1
end
end
testerr=err_num/(2*num)
print(testerr)
- In this example toy data is being processed using the Isomap algorithm
- as described in
-
- Silva, V. D., & Tenenbaum, J. B. (2003).
- Global versus local methods in nonlinear dimensionality reduction.
- Advances in Neural Information Processing Systems 15, 15(Figure 2), 721-728. MIT Press.
- Retrieved from http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.9.3407&rep=rep1&type=pdf
-
- Before applying to the data the landmark approximation is enabled with
- specified number of landmarks. The landmark approximation is described in
-
- Sparse multidimensional scaling using landmark points
- V De Silva, J B Tenenbaum (2004) Technology, p. 1-4
-
- After enabling the landmark approximation k parameter -- the number
- of neighbors in the k nearest neighbor graph -- is initialized.
require 'modshogun'
require 'load'
data = load_numbers('../data/fm_train_real.dat')
parameter_list = {{data}}
function converter_isomap_modular(data)
features = modshogun.RealFeatures(data)
converter = modshogun.Isomap()
converter:set_target_dim(1)
converter:apply(features)
return features
end
if debug.getinfo(3) == nill then
print 'Isomap'
converter_isomap_modular(unpack(parameter_list[1]))
end
- An approach as applied below, which shows the processing of input data
- from a file becomes a crucial factor for writing your own sample applications.
- This approach is just one example of what can be done using the distance
- functions provided by shogun.
-
- First, you need to determine what type your data will be, because this
- will determine the distance function you can use.
-
- This example loads two stored matrices of real values from different
- files and initializes the matrices to 'RealFeatures'.
- Each column of the matrices corresponds to one data point.
-
- The distance initialized by two data sets (the same data set as shown in the
- first call) controls the processing of the given data points, where a pairwise
- distance matrix is computed by 'get_distance_matrix'.
-
- The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
-
- The method call 'init'* binds the given data sets, where a pairwise distance
- matrix between these two data sets is computed by 'get_distance_matrix'.
-
- The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
-
- *Note that the previous computed distance matrix can no longer be
- reaccessed by 'get_distance_matrix'.
-
- For more details see doc/classshogun_1_1CBrayCurtisDistance.html.
-
- Obviously, using the Bray Curtis distance is not limited to this showcase
- example.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')
parameter_list = {{traindat,testdat},{traindat,testdat}}
function distance_braycurtis_modular (fm_train_real,fm_test_real)
feats_train=modshogun.RealFeatures(fm_train_real)
feats_test=modshogun.RealFeatures(fm_test_real)
distance=modshogun.BrayCurtisDistance(feats_train, feats_train)
dm_train=distance:get_distance_matrix()
distance:init(feats_train, feats_test)
dm_test=distance:get_distance_matrix()
return distance,dm_train,dm_test
end
if debug.getinfo(3) == nill then
print 'BrayCurtisDistance'
distance_braycurtis_modular(unpack(parameter_list[1]))
end
- In this example various (accuracy, error rate, ..) measures are being computed
- for the pair of ground truth toy data and random data.
require 'modshogun'
require 'load'
ground_truth = load_labels('../data/label_train_twoclass.dat')
math.randomseed(17)
predicted = {}
for i = 1, #ground_truth do
table.insert(predicted, math.random())
end
parameter_list = {{ground_truth,predicted}}
function evaluation_contingencytableevaluation_modular(ground_truth, predicted)
ground_truth_labels = modshogun.BinaryLabels(ground_truth)
predicted_labels = modshogun.BinaryLabels(predicted)
base_evaluator = modshogun.ContingencyTableEvaluation()
base_evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.AccuracyMeasure()
accuracy = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.ErrorRateMeasure()
errorrate = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.BALMeasure()
bal = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.WRACCMeasure()
wracc = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.F1Measure()
f1 = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.CrossCorrelationMeasure()
crosscorrelation = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.RecallMeasure()
recall = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.PrecisionMeasure()
precision = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.SpecificityMeasure()
specificity = evaluator:evaluate(predicted_labels,ground_truth_labels)
return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity
end
if debug.getinfo(3) == nill then
print 'ContingencyTableEvaluation'
evaluation_contingencytableevaluation_modular(unpack(parameter_list[1]))
end
require 'modshogun'
matrix = {{1,2,3},{4,0,0},{0,0,0},{0,5,0},{0,0,6},{9,9,9}}
parameter_list = {{matrix}}
function features_dense_real_modular(A)
a=modshogun.RealFeatures(A)
a:set_feature_vector({1,4,0,0,0,9}, 0)
a_out = a:get_feature_matrix()
return a_out
end
if debug.getinfo(3) == nill then
print 'dense_real'
features_dense_real_modular(unpack(parameter_list[1]))
end
- This example demonstrates how to encode ASCII-strings (255 symbols) in shogun.
require 'modshogun'
require 'load'
strings = {'hey','guys','i','am','a','string'}
parameter_list={{strings}}
function features_string_char_modular(strings)
for k, v in pairs(strings) do print(v) end
f=modshogun.StringCharFeatures(strings, modshogun.RAWBYTE)
print("max string length " ..f:get_max_vector_length())
print("number of strings " .. f:get_num_vectors())
--print ("length of first string" ..f:get_vector_length(0))
--print ("strings" .. f:get_features())
--FIXME
--f:set_feature_vector({"t","e","s","t"}, 0)
return f:get_features(), f
end
if debug.getinfo(3) == nill then
print 'StringCharFeatures'
features_string_char_modular(unpack(parameter_list[1]))
end
- This is an example for the initialization of the CommUlongString-kernel. This kernel
- sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
- that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
- only once.
require 'modshogun'
require 'load'
traindat = load_dna('../data/fm_train_dna.dat')
testdat = load_dna('../data/fm_test_dna.dat')
parameter_list = {{traindat,testdat,3,0,false},{traindat,testdat,4,0,false}}
function kernel_comm_ulong_string_modular (fm_train_dna,fm_test_dna, order, gap, reverse)
charfeat=modshogun.StringCharFeatures(modshogun.DNA)
charfeat:set_features(fm_train_dna)
feats_train=modshogun.StringUlongFeatures(charfeat:get_alphabet())
feats_train:obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=modshogun.SortUlongString()
preproc:init(feats_train)
feats_train:add_preprocessor(preproc)
feats_train:apply_preprocessor()
charfeat=modshogun.StringCharFeatures(modshogun.DNA)
charfeat:set_features(fm_test_dna)
feats_test=modshogun.StringUlongFeatures(charfeat:get_alphabet())
feats_test:obtain_from_char(charfeat, order-1, order, gap, reverse)
feats_test:add_preprocessor(preproc)
feats_test:apply_preprocessor()
use_sign=false
kernel=modshogun.CommUlongStringKernel(feats_train, feats_train, use_sign)
km_train=kernel:get_kernel_matrix()
kernel:init(feats_train, feats_test)
km_test=kernel:get_kernel_matrix()
return km_train,km_test,kernel
end
if debug.getinfo(3) == nill then
print 'CommUlongString'
kernel_comm_ulong_string_modular(unpack(parameter_list[1]))
end
- This is an example for the initialization of the CommWordString-kernel (aka
- Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel
- sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
- that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
- only once.
require 'modshogun'
require 'load'
traindat = load_dna('../data/fm_train_dna.dat')
testdat = load_dna('../data/fm_test_dna.dat')
parameter_list = {{traindat,testdat,4,0,false, false},{traindat,testdat,4,0,False,False}}
function kernel_comm_word_string_modular (fm_train_dna,fm_test_dna, order, gap, reverse, use_sign)
charfeat=modshogun.StringCharFeatures(modshogun.DNA)
charfeat:set_features(fm_train_dna)
feats_train=modshogun.StringWordFeatures(charfeat:get_alphabet())
feats_train:obtain_from_char(charfeat, order-1, order, gap, reverse)
preproc=modshogun.SortWordString()
preproc:init(feats_train)
feats_train:add_preprocessor(preproc)
feats_train:apply_preprocessor()
charfeat=modshogun.StringCharFeatures(modshogun.DNA)
charfeat:set_features(fm_test_dna)
feats_test=modshogun.StringWordFeatures(charfeat:get_alphabet())
feats_test:obtain_from_char(charfeat, order-1, order, gap, reverse)
feats_test:add_preprocessor(preproc)
feats_test:apply_preprocessor()
kernel=modshogun.CommWordStringKernel(feats_train, feats_train, use_sign)
km_train=kernel:get_kernel_matrix()
kernel:init(feats_train, feats_test)
km_test=kernel:get_kernel_matrix()
return km_train,km_test,kernel
end
if debug.getinfo(3) == nill then
print 'CommWordString'
kernel_comm_word_string_modular(unpack(parameter_list[1]))
end
- The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')
parameter_list = {{traindat,testdat, 1.3},{traindat,testdat, 1.4}}
function kernel_gaussian_modular (fm_train_real,fm_test_real,width)
feats_train=modshogun.RealFeatures(fm_train_real)
feats_test=modshogun.RealFeatures(fm_test_real)
kernel=modshogun.GaussianKernel(feats_train, feats_train, width)
km_train=kernel:get_kernel_matrix()
kernel:init(feats_train, feats_test)
km_test=kernel:get_kernel_matrix()
return km_train,km_test,kernel
end
if debug.getinfo(3) == nill then
print 'Gaussian'
kernel_gaussian_modular(unpack(parameter_list[1]))
end
- This is an example for the initialization of a linear kernel on raw byte
- data.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_byte.dat')
testdat = load_numbers('../data/fm_test_byte.dat')
parameter_list={{traindat,testdat},{traindat,testdat}}
function kernel_linear_byte_modular(fm_train_byte,fm_test_byte)
feats_train=modshogun.ByteFeatures(fm_train_byte)
feats_test=modshogun.ByteFeatures(fm_test_byte)
kernel=modshogun.LinearKernel(feats_train, feats_train)
km_train=kernel:get_kernel_matrix()
kernel:init(feats_train, feats_test)
km_test=kernel:get_kernel_matrix()
return kernel
end
if debug.getinfo(3) == nill then
print 'LinearByte'
kernel_linear_byte_modular(unpack(parameter_list[1]))
end
- This is an example for the initialization of a linear kernel on word (2byte)
- data.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_word.dat')
testdat = load_numbers('../data/fm_test_word.dat')
parameter_list={{traindat,testdat,1.2},{traindat,testdat,1.2}}
function kernel_linear_word_modular (fm_train_word,fm_test_word,scale)
feats_train=modshogun.WordFeatures(fm_train_word)
feats_test=modshogun.WordFeatures(fm_test_word)
kernel=modshogun.LinearKernel(feats_train, feats_train)
kernel:set_normalizer(modshogun.AvgDiagKernelNormalizer(scale))
kernel:init(feats_train, feats_train)
km_train=kernel:get_kernel_matrix()
kernel:init(feats_train, feats_test)
km_test=kernel:get_kernel_matrix()
return kernel
end
if debug.getinfo(3) == nill then
print 'LinearWord'
kernel_linear_word_modular(unpack(parameter_list[1]))
end
- This examples shows how to create a Weighted Degree String Kernel from data
- and how to compute the kernel matrix from the resulting object.
require 'modshogun'
require 'load'
traindat = load_dna('../data/fm_train_dna.dat')
testdat = load_dna('../data/fm_test_dna.dat')
parameter_list = {{traindat,testdat,3},{traindat,testdat,20}}
function kernel_weighted_degree_string_modular (fm_train_dna,fm_test_dna,degree)
feats_train=modshogun.StringCharFeatures(fm_train_dna, modshogun.DNA)
feats_test=modshogun.StringCharFeatures(fm_test_dna, modshogun.DNA)
kernel=modshogun.WeightedDegreeStringKernel(feats_train, feats_train, degree)
weights = {}
for i = degree, 1, -1 do
table.insert(weights, 2*i/((degree+1)*degree))
end
kernel:set_wd_weights(weights)
km_train=kernel:get_kernel_matrix()
kernel:init(feats_train, feats_test)
km_test=kernel:get_kernel_matrix()
return km_train, km_test, kernel
end
if debug.getinfo(3) == nill then
print 'WeightedDegreeString'
kernel_weighted_degree_string_modular(unpack(parameter_list[1]))
end
- In this example a kernelized version of ridge regression (KRR) is trained on a
- real-valued data set. The KRR is trained with regularization parameter tau=1e-6
- and a gaussian kernel with width=0.8. The labels of both the train and the test
- data can be fetched via krr.classify().get_labels().
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')
label_traindat = load_labels('../data/label_train_twoclass.dat')
parameter_list = {{traindat,testdat,label_traindat,0.8,1e-6},{traindat,testdat,label_traindat,0.9,1e-7}}
function regression_krr_modular (fm_train,fm_test,label_train,width,tau)
feats_train=modshogun.RealFeatures(fm_train)
feats_test=modshogun.RealFeatures(fm_test)
kernel=modshogun.GaussianKernel(feats_train, feats_train, width)
labels=modshogun.RegressionLabels(label_train)
krr=modshogun.KernelRidgeRegression(tau, kernel, labels)
krr:train(feats_train)
kernel:init(feats_train, feats_test)
out = krr:apply():get_labels()
return out,kernel,krr
end
print 'KernelRidgeRegression'
regression_krr_modular(unpack(parameter_list[1]))
- In this example serialization of SVM (Support Vector Machine) is shown
require 'os'
require 'modshogun'
require 'load'
parameter_list={{5,1,10, 2.0, 10}, {10,0.3,2, 1.0, 0.1}}
function check_status(status)
assert(status == true)
-- if status:
-- print "OK reading/writing .h5\n"
--else:
-- print "ERROR reading/writing .h5\n"
end
function concatenate(...)
local result = ...
for _,t in ipairs{select(2, ...)} do
for row,rowdata in ipairs(t) do
for col,coldata in ipairs(rowdata) do
table.insert(result[row], coldata)
end
end
end
return result
end
function rand_matrix(rows, cols, dist)
local matrix = {}
for i = 1, rows do
matrix[i] = {}
for j = 1, cols do
matrix[i][j] = math.random() + dist
end
end
return matrix
end
function generate_lab(num)
lab={}
for i=1,num do
lab[i]=0
end
for i=num+1,2*num do
lab[i]=1
end
for i=2*num+1,3*num do
lab[i]=2
end
for i=3*num+1,4*num do
lab[i]=3
end
return lab
end
function serialization_complex_example(num, dist, dim, C, width)
math.randomseed(17)
data=concatenate(rand_matrix(dim, num, 0), rand_matrix(dim, num, dist), rand_matrix(dim, num, 2 * dist), rand_matrix(dim, num, 3 * dist))
lab=generate_lab(num)
feats=modshogun.RealFeatures(data)
kernel=modshogun.GaussianKernel(feats, feats, width)
labels=modshogun.MulticlassLabels(lab)
svm = modshogun.GMNPSVM(C, kernel, labels)
feats:add_preprocessor(modshogun.NormOne())
feats:add_preprocessor(modshogun.LogPlusOne())
feats:set_preprocessed(1)
svm:train(feats)
fstream = modshogun.SerializableHdf5File("blaah.h5", "w")
status = svm:save_serializable(fstream)
check_status(status)
fstream = modshogun.SerializableAsciiFile("blaah.asc", "w")
status = svm:save_serializable(fstream)
check_status(status)
fstream = modshogun.SerializableJsonFile("blaah.json", "w")
status = svm:save_serializable(fstream)
check_status(status)
fstream = modshogun.SerializableXmlFile("blaah.xml", "w")
status = svm:save_serializable(fstream)
check_status(status)
fstream = modshogun.SerializableHdf5File("blaah.h5", "r")
new_svm=modshogun.GMNPSVM()
status = new_svm:load_serializable(fstream)
check_status(status)
new_svm:train()
fstream = modshogun.SerializableAsciiFile("blaah.asc", "r")
new_svm=modshogun.GMNPSVM()
status = new_svm:load_serializable(fstream)
check_status(status)
new_svm:train()
fstream = modshogun.SerializableJsonFile("blaah.json", "r")
new_svm=modshogun.GMNPSVM()
status = new_svm:load_serializable(fstream)
check_status(status)
new_svm:train()
fstream = modshogun.SerializableXmlFile("blaah.xml", "r")
new_svm=modshogun.GMNPSVM()
status = new_svm:load_serializable(fstream)
check_status(status)
new_svm:train()
os.remove("blaah.h5")
os.remove("blaah.asc")
os.remove("blaah.json")
os.remove("blaah.xml")
os.remove("blaah.h5")
os.remove("blaah.asc")
os.remove("blaah.json")
os.remove("blaah.xml")
return svm,new_svm
end
if debug.getinfo(3) == nill then
print 'Serialization SVMLight'
serialization_complex_example(unpack(parameter_list[1]))
end