|
SHOGUN
4.2.0
|
This page lists ready to run shogun examples for the Lua Modular interface.
To run the examples issue
lua name_of_example.lua
-- In this example the Averaged Perceptron used to classify toy data.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')
label_traindat = load_labels('../data/label_train_twoclass.dat')
parameter_list = {{traindat,testdat,label_traindat,1.,1000,1},{traindat,testdat,label_traindat,1.,100,1}}
function classifier_averaged_perceptron_modular (fm_train_real,fm_test_real,label_train_twoclass,learn_rate,max_iter,num_threads)
feats_train=modshogun.RealFeatures(fm_train_real)
feats_test=modshogun.RealFeatures(fm_test_real)
labels=modshogun.BinaryLabels(label_train_twoclass)
perceptron=modshogun.AveragedPerceptron(feats_train, labels)
perceptron:set_learn_rate(learn_rate)
perceptron:set_max_iter(max_iter)
perceptron:train()
perceptron:set_features(feats_test)
out_labels = perceptron:apply():get_labels()
return perceptron, out_labels
end
if debug.getinfo(3) == nill then
print 'AveragedPerceptron'
classifier_averaged_perceptron_modular(unpack(parameter_list[1]))
end
-- An approach as applied below, which shows the processing of input data
-- from a file becomes a crucial factor for writing your own sample applications.
-- This approach is just one example of what can be done using the distance
-- functions provided by shogun.
--
-- First, you need to determine what type your data will be, because this
-- will determine the distance function you can use.
--
-- This example loads two stored matrices of real values from different
-- files and initializes the matrices to 'RealFeatures'.
-- Each column of the matrices corresponds to one data point.
--
-- The distance initialized by two data sets (the same data set as shown in the
-- first call) controls the processing of the given data points, where a pairwise
-- distance matrix is computed by 'get_distance_matrix'.
--
-- The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
--
-- The method call 'init'* binds the given data sets, where a pairwise distance
-- matrix between these two data sets is computed by 'get_distance_matrix'.
--
-- The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
--
-- *Note that the previous computed distance matrix can no longer be
-- reaccessed by 'get_distance_matrix'.
--
-- For more details see doc/classshogun_1_1CBrayCurtisDistance.html.
--
-- Obviously, using the Bray Curtis distance is not limited to this showcase
-- example.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')
parameter_list = {{traindat,testdat},{traindat,testdat}}
function distance_braycurtis_modular (fm_train_real,fm_test_real)
feats_train=modshogun.RealFeatures(fm_train_real)
feats_test=modshogun.RealFeatures(fm_test_real)
distance=modshogun.BrayCurtisDistance(feats_train, feats_train)
dm_train=distance:get_distance_matrix()
distance:init(feats_train, feats_test)
dm_test=distance:get_distance_matrix()
return distance,dm_train,dm_test
end
if debug.getinfo(3) == nill then
print 'BrayCurtisDistance'
distance_braycurtis_modular(unpack(parameter_list[1]))
end
-- In this example various (accuracy, error rate, ..) measures are being computed
-- for the pair of ground truth toy data and random data.
require 'modshogun'
require 'load'
ground_truth = load_labels('../data/label_train_twoclass.dat')
math.randomseed(17)
predicted = {}
for i = 1, #ground_truth do
table.insert(predicted, math.random())
end
parameter_list = {{ground_truth,predicted}}
function evaluation_contingencytableevaluation_modular(ground_truth, predicted)
ground_truth_labels = modshogun.BinaryLabels(ground_truth)
predicted_labels = modshogun.BinaryLabels(predicted)
base_evaluator = modshogun.ContingencyTableEvaluation()
base_evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.AccuracyMeasure()
accuracy = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.ErrorRateMeasure()
errorrate = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.BALMeasure()
bal = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.WRACCMeasure()
wracc = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.F1Measure()
f1 = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.CrossCorrelationMeasure()
crosscorrelation = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.RecallMeasure()
recall = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.PrecisionMeasure()
precision = evaluator:evaluate(predicted_labels,ground_truth_labels)
evaluator = modshogun.SpecificityMeasure()
specificity = evaluator:evaluate(predicted_labels,ground_truth_labels)
return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity
end
if debug.getinfo(3) == nill then
print 'ContingencyTableEvaluation'
evaluation_contingencytableevaluation_modular(unpack(parameter_list[1]))
end
require 'modshogun'
matrix = {{1,2,3},{4,0,0},{0,0,0},{0,5,0},{0,0,6},{9,9,9}}
parameter_list = {{matrix}}
function features_dense_real_modular(A)
a=modshogun.RealFeatures(A)
a:set_feature_vector({1,4,0,0,0,9}, 0)
a_out = a:get_feature_matrix()
return a_out
end
if debug.getinfo(3) == nill then
print 'dense_real'
features_dense_real_modular(unpack(parameter_list[1]))
end
-- This example demonstrates how to encode ASCII-strings (255 symbols) in shogun.
require 'modshogun'
require 'load'
strings = {'hey','guys','i','am','a','string'}
parameter_list={{strings}}
function features_string_char_modular(strings)
--for k, v in pairs(strings) do print(v) end
--FIXME
--f=modshogun.StringCharFeatures(strings, modshogun.RAWBYTE)
--print("max string length " ..f:get_max_vector_length())
--print("number of strings " .. f:get_num_vectors())
--print ("length of first string" ..f:get_vector_length(0))
--print ("strings" .. f:get_features())
--FIXME
--f:set_feature_vector({"t","e","s","t"}, 0)
--return f:get_features(), f
end
if debug.getinfo(3) == nill then
print 'StringCharFeatures'
features_string_char_modular(unpack(parameter_list[1]))
end
-- This is an example for the initialization of the CommUlongString-kernel. This kernel
-- sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
-- that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
-- only once.
require 'modshogun'
require 'load'
traindat = load_dna('../data/fm_train_dna.dat')
testdat = load_dna('../data/fm_test_dna.dat')
parameter_list = {{traindat,testdat,3,0,false},{traindat,testdat,4,0,false}}
function kernel_comm_ulong_string_modular (fm_train_dna,fm_test_dna, order, gap, reverse)
--charfeat=modshogun.StringCharFeatures(modshogun.DNA)
--charfeat:set_features(fm_train_dna)
--feats_train=modshogun.StringUlongFeatures(charfeat:get_alphabet())
--feats_train:obtain_from_char(charfeat, order-1, order, gap, reverse)
--preproc=modshogun.SortUlongString()
--preproc:init(feats_train)
--feats_train:add_preprocessor(preproc)
--feats_train:apply_preprocessor()
--charfeat=modshogun.StringCharFeatures(modshogun.DNA)
--charfeat:set_features(fm_test_dna)
--feats_test=modshogun.StringUlongFeatures(charfeat:get_alphabet())
--feats_test:obtain_from_char(charfeat, order-1, order, gap, reverse)
--feats_test:add_preprocessor(preproc)
--feats_test:apply_preprocessor()
--use_sign=false
--kernel=modshogun.CommUlongStringKernel(feats_train, feats_train, use_sign)
--km_train=kernel:get_kernel_matrix()
--kernel:init(feats_train, feats_test)
--km_test=kernel:get_kernel_matrix()
--return km_train,km_test,kernel
end
if debug.getinfo(3) == nill then
print 'CommUlongString'
kernel_comm_ulong_string_modular(unpack(parameter_list[1]))
end
-- This is an example for the initialization of the CommWordString-kernel (aka
-- Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel
-- sums over k-mere matches (k='order'). For efficient computing a preprocessor is used
-- that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted
-- only once.
require 'modshogun'
require 'load'
traindat = load_dna('../data/fm_train_dna.dat')
testdat = load_dna('../data/fm_test_dna.dat')
parameter_list = {{traindat,testdat,4,0,false, false},{traindat,testdat,4,0,False,False}}
function kernel_comm_word_string_modular (fm_train_dna,fm_test_dna, order, gap, reverse, use_sign)
--charfeat=modshogun.StringCharFeatures(modshogun.DNA)
--charfeat:set_features(fm_train_dna)
--feats_train=modshogun.StringWordFeatures(charfeat:get_alphabet())
--feats_train:obtain_from_char(charfeat, order-1, order, gap, reverse)
--
--preproc=modshogun.SortWordString()
--preproc:init(feats_train)
--feats_train:add_preprocessor(preproc)
--feats_train:apply_preprocessor()
--
--charfeat=modshogun.StringCharFeatures(modshogun.DNA)
--charfeat:set_features(fm_test_dna)
--feats_test=modshogun.StringWordFeatures(charfeat:get_alphabet())
--feats_test:obtain_from_char(charfeat, order-1, order, gap, reverse)
--feats_test:add_preprocessor(preproc)
--feats_test:apply_preprocessor()
--
--kernel=modshogun.CommWordStringKernel(feats_train, feats_train, use_sign)
--
--km_train=kernel:get_kernel_matrix()
--kernel:init(feats_train, feats_test)
--km_test=kernel:get_kernel_matrix()
--return km_train,km_test,kernel
end
if debug.getinfo(3) == nill then
print 'CommWordString'
kernel_comm_word_string_modular(unpack(parameter_list[1]))
end
-- The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')
parameter_list = {{traindat,testdat, 1.3},{traindat,testdat, 1.4}}
function kernel_gaussian_modular (fm_train_real,fm_test_real,width)
feats_train=modshogun.RealFeatures(fm_train_real)
feats_test=modshogun.RealFeatures(fm_test_real)
kernel=modshogun.GaussianKernel(feats_train, feats_train, width)
km_train=kernel:get_kernel_matrix()
kernel:init(feats_train, feats_test)
km_test=kernel:get_kernel_matrix()
return km_train,km_test,kernel
end
if debug.getinfo(3) == nill then
print 'Gaussian'
kernel_gaussian_modular(unpack(parameter_list[1]))
end
-- This is an example for the initialization of a linear kernel on raw byte
-- data.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_byte.dat')
testdat = load_numbers('../data/fm_test_byte.dat')
parameter_list={{traindat,testdat},{traindat,testdat}}
function kernel_linear_byte_modular(fm_train_byte,fm_test_byte)
feats_train=modshogun.ByteFeatures(fm_train_byte)
feats_test=modshogun.ByteFeatures(fm_test_byte)
kernel=modshogun.LinearKernel(feats_train, feats_train)
km_train=kernel:get_kernel_matrix()
kernel:init(feats_train, feats_test)
km_test=kernel:get_kernel_matrix()
return kernel
end
if debug.getinfo(3) == nill then
print 'LinearByte'
kernel_linear_byte_modular(unpack(parameter_list[1]))
end
-- This is an example for the initialization of a linear kernel on word (2byte)
-- data.
require 'modshogun'
require 'load'
traindat = load_numbers('../data/fm_train_word.dat')
testdat = load_numbers('../data/fm_test_word.dat')
parameter_list={{traindat,testdat,1.2},{traindat,testdat,1.2}}
function kernel_linear_word_modular (fm_train_word,fm_test_word,scale)
feats_train=modshogun.WordFeatures(fm_train_word)
feats_test=modshogun.WordFeatures(fm_test_word)
kernel=modshogun.LinearKernel(feats_train, feats_train)
kernel:set_normalizer(modshogun.AvgDiagKernelNormalizer(scale))
kernel:init(feats_train, feats_train)
km_train=kernel:get_kernel_matrix()
kernel:init(feats_train, feats_test)
km_test=kernel:get_kernel_matrix()
return kernel
end
if debug.getinfo(3) == nill then
print 'LinearWord'
kernel_linear_word_modular(unpack(parameter_list[1]))
end
-- This examples shows how to create a Weighted Degree String Kernel from data
-- and how to compute the kernel matrix from the resulting object.
require 'modshogun'
require 'load'
traindat = load_dna('../data/fm_train_dna.dat')
testdat = load_dna('../data/fm_test_dna.dat')
parameter_list = {{traindat,testdat,3},{traindat,testdat,20}}
function kernel_weighted_degree_string_modular (fm_train_dna,fm_test_dna,degree)
--feats_train=modshogun.StringCharFeatures(fm_train_dna, modshogun.DNA)
--feats_test=modshogun.StringCharFeatures(fm_test_dna, modshogun.DNA)
--
--kernel=modshogun.WeightedDegreeStringKernel(feats_train, feats_train, degree)
--
--weights = {}
--for i = degree, 1, -1 do
--table.insert(weights, 2*i/((degree+1)*degree))
--end
--kernel:set_wd_weights(weights)
--
--km_train=kernel:get_kernel_matrix()
--kernel:init(feats_train, feats_test)
--km_test=kernel:get_kernel_matrix()
--
--return km_train, km_test, kernel
end
if debug.getinfo(3) == nill then
print 'WeightedDegreeString'
kernel_weighted_degree_string_modular(unpack(parameter_list[1]))
end
-- In this example serialization of SVM (Support Vector Machine) is shown
require 'os'
require 'modshogun'
require 'load'
parameter_list={{5,1,10, 2.0, 10}, {10,0.3,2, 1.0, 0.1}}
function check_status(status)
assert(status == true)
-- if status:
-- print "OK reading/writing .h5\n"
--else:
-- print "ERROR reading/writing .h5\n"
end
function concatenate(...)
local result = ...
for _,t in ipairs{select(2, ...)} do
for row,rowdata in ipairs(t) do
for col,coldata in ipairs(rowdata) do
table.insert(result[row], coldata)
end
end
end
return result
end
function rand_matrix(rows, cols, dist)
local matrix = {}
for i = 1, rows do
matrix[i] = {}
for j = 1, cols do
matrix[i][j] = math.random() + dist
end
end
return matrix
end
function generate_lab(num)
lab={}
for i=1,num do
lab[i]=0
end
for i=num+1,2*num do
lab[i]=1
end
for i=2*num+1,3*num do
lab[i]=2
end
for i=3*num+1,4*num do
lab[i]=3
end
return lab
end
function serialization_complex_example(num, dist, dim, C, width)
math.randomseed(17)
data=concatenate(rand_matrix(dim, num, 0), rand_matrix(dim, num, dist), rand_matrix(dim, num, 2 * dist), rand_matrix(dim, num, 3 * dist))
lab=generate_lab(num)
feats=modshogun.RealFeatures(data)
kernel=modshogun.GaussianKernel(feats, feats, width)
labels=modshogun.MulticlassLabels(lab)
svm = modshogun.GMNPSVM(C, kernel, labels)
feats:add_preprocessor(modshogun.NormOne())
feats:add_preprocessor(modshogun.LogPlusOne())
feats:set_preprocessed(1)
svm:train(feats)
fstream = modshogun.SerializableHdf5File("blaah.h5", "w")
status = svm:save_serializable(fstream)
check_status(status)
fstream = modshogun.SerializableAsciiFile("blaah.asc", "w")
status = svm:save_serializable(fstream)
check_status(status)
-- fstream = modshogun.SerializableJsonFile("blaah.json", "w")
-- status = svm:save_serializable(fstream)
-- check_status(status)
fstream = modshogun.SerializableXmlFile("blaah.xml", "w")
status = svm:save_serializable(fstream)
check_status(status)
fstream = modshogun.SerializableHdf5File("blaah.h5", "r")
new_svm=modshogun.GMNPSVM()
status = new_svm:load_serializable(fstream)
check_status(status)
new_svm:train()
fstream = modshogun.SerializableAsciiFile("blaah.asc", "r")
new_svm=modshogun.GMNPSVM()
status = new_svm:load_serializable(fstream)
check_status(status)
new_svm:train()
-- fstream = modshogun.SerializableJsonFile("blaah.json", "r")
-- new_svm=modshogun.GMNPSVM()
-- status = new_svm:load_serializable(fstream)
-- check_status(status)
-- new_svm:train()
fstream = modshogun.SerializableXmlFile("blaah.xml", "r")
new_svm=modshogun.GMNPSVM()
status = new_svm:load_serializable(fstream)
check_status(status)
new_svm:train()
os.remove("blaah.h5")
os.remove("blaah.asc")
-- os.remove("blaah.json")
os.remove("blaah.xml")
return svm,new_svm
end
if debug.getinfo(3) == nill then
print 'Serialization SVMLight'
serialization_complex_example(unpack(parameter_list[1]))
end