SHOGUN: Examples for Lua Modular Interface

This page lists ready to run shogun examples for the Lua Modular interface.

To run the examples issue

lua name_of_example.lua

Classifier

../examples/documented/lua_modular/classifier_averaged_perceptron_modular.lua

-  In this example the Averaged Perceptron used to classify toy data.

require 'modshogun'
require 'load'

traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')
label_traindat = load_labels('../data/label_train_twoclass.dat')

parameter_list = {{traindat,testdat,label_traindat,1.,1000,1},{traindat,testdat,label_traindat,1.,100,1}}

function classifier_averaged_perceptron_modular (fm_train_real,fm_test_real,label_train_twoclass,learn_rate,max_iter,num_threads)

	feats_train=modshogun.RealFeatures(fm_train_real)
	feats_test=modshogun.RealFeatures(fm_test_real)

	labels=modshogun.Labels(label_train_twoclass)

	perceptron=modshogun.AveragedPerceptron(feats_train, labels)
	perceptron:set_learn_rate(learn_rate)
	perceptron:set_max_iter(max_iter)

	perceptron:train()

	perceptron:set_features(feats_test)
	out_labels = perceptron:apply():get_labels()

	return perceptron, out_labels
end

if debug.getinfo(3) == nill then
	print 'AveragedPerceptron'
	classifier_averaged_perceptron_modular(unpack(parameter_list[1]))
end

../examples/documented/lua_modular/classifier_libsvm_minimal_modular.lua

-  In this example a two-class support vector machine classifier is trained on a
-  2-dimensional randomly generated data set and the trained classifier is used to
-  predict labels of test examples. As training algorithm the LIBSVM solver is used
-  with SVM regularization parameter C=1 and a Gaussian kernel of width 2.1.
-  
-  For more details on LIBSVM solver see http://www.csie.ntu.edu.tw/~cjlin/libsvm/

require 'modshogun'
require 'load'

function concatenate(...)
	local result = ...
	for _,t in ipairs{select(2, ...)} do
		for row,rowdata in ipairs(t) do
			for col,coldata in ipairs(rowdata) do
				table.insert(result[row], coldata)
			end		
		end
	end
	return result
end

function rand_matrix(rows, cols, dist)
  local matrix = {}
	for i = 1, rows do
		matrix[i] = {}
		for j = 1, cols do
			matrix[i][j] = math.random() + dist
		end	
	end
	return matrix
end

function ones(num)
	r={}
	for i=1,num do
		r[i]=1
	end
	return r
end


num=1000
dist=1
width=2.1
C=1

traindata_real=concatenate(rand_matrix(2,num, -dist),rand_matrix(2,num,dist))
testdata_real=concatenate(rand_matrix(2,num,-dist), rand_matrix(2,num, dist))

trainlab={}
for i = 1, num do
	trainlab[i] = -1
	trainlab[i + num] = 1
end

testlab={}
for i = 1, num do
	testlab[i] = -1
	testlab[i + num] = 1
end

feats_train=modshogun.RealFeatures(traindata_real)
feats_test=modshogun.RealFeatures(testdata_real)
kernel=modshogun.GaussianKernel(feats_train, feats_train, width)

labels=modshogun.Labels(trainlab)
svm=modshogun.LibSVM(C, kernel, labels)
svm:train()

kernel:init(feats_train, feats_test)
out=svm:apply():get_labels()

err_num = 0
for i = 1, num do 
	if out[i] > 0 then
		err_num = err_num+1
	end
	if out[i+num] < 0 then
		err_num = err_num+1
	end
end

testerr=err_num/(2*num)
print(testerr)

Distance

../examples/documented/lua_modular/distance_braycurtis_modular.lua

-  An approach as applied below, which shows the processing of input data
-  from a file becomes a crucial factor for writing your own sample applications.
-  This approach is just one example of what can be done using the distance
-  functions provided by shogun.
-  
-  First, you need to determine what type your data will be, because this
-  will determine the distance function you can use.
-  
-  This example loads two stored matrices of real values from different 
-  files and initializes the matrices to 'RealFeatures'.
-  Each column of the matrices corresponds to one data point.
-  
-  The distance initialized by two data sets (the same data set as shown in the 
-  first call) controls the processing of the given data points, where a pairwise 
-  distance matrix is computed by 'get_distance_matrix'.
-  
-  The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
-  
-  The method call 'init'* binds the given data sets, where a pairwise distance 
-  matrix between these two data sets is computed by 'get_distance_matrix'.
-  
-  The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
-  
-  *Note that the previous computed distance matrix can no longer be 
-  reaccessed by 'get_distance_matrix'.
-  
-  For more details see doc/classshogun_1_1CBrayCurtisDistance.html.
-  
-  Obviously, using the Bray Curtis distance is not limited to this showcase 
-  example.

require 'modshogun'
require 'load'

traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')

parameter_list = {{traindat,testdat},{traindat,testdat}}

function distance_braycurtis_modular (fm_train_real,fm_test_real)

	feats_train=modshogun.RealFeatures(fm_train_real)
	feats_test=modshogun.RealFeatures(fm_test_real)

	distance=modshogun.BrayCurtisDistance(feats_train, feats_train)
	
	dm_train=distance:get_distance_matrix()
	distance:init(feats_train, feats_test)
	dm_test=distance:get_distance_matrix()

	return distance,dm_train,dm_test
end

if debug.getinfo(3) == nill then
	print 'BrayCurtisDistance'
	distance_braycurtis_modular(unpack(parameter_list[1]))
end

Evaluation

../examples/documented/lua_modular/evaluation_contingencytableevaluation_modular.lua

-  In this example various (accuracy, error rate, ..) measures are being computed
-  for the pair of ground truth toy data and random data. 

require 'modshogun'
require 'load'

ground_truth = load_labels('../data/label_train_twoclass.dat')
math.randomseed(17)

predicted = {}
for i = 1, #ground_truth do
	table.insert(predicted, math.random())
end
parameter_list = {{ground_truth,predicted}}

function evaluation_contingencytableevaluation_modular(ground_truth, predicted)

	ground_truth_labels = modshogun.Labels(ground_truth)
	predicted_labels = modshogun.Labels(predicted)
	
	base_evaluator = modshogun.ContingencyTableEvaluation()
	base_evaluator:evaluate(predicted_labels,ground_truth_labels)

	evaluator = modshogun.AccuracyMeasure()	
	accuracy = evaluator:evaluate(predicted_labels,ground_truth_labels)

	evaluator = modshogun.ErrorRateMeasure()
	errorrate = evaluator:evaluate(predicted_labels,ground_truth_labels)

	evaluator = modshogun.BALMeasure()
	bal = evaluator:evaluate(predicted_labels,ground_truth_labels)

	evaluator = modshogun.WRACCMeasure()
	wracc = evaluator:evaluate(predicted_labels,ground_truth_labels)
 
	evaluator = modshogun.F1Measure()
	f1 = evaluator:evaluate(predicted_labels,ground_truth_labels)

	evaluator = modshogun.CrossCorrelationMeasure()
	crosscorrelation = evaluator:evaluate(predicted_labels,ground_truth_labels)

	evaluator = modshogun.RecallMeasure()
	recall = evaluator:evaluate(predicted_labels,ground_truth_labels)

	evaluator = modshogun.PrecisionMeasure()
	precision = evaluator:evaluate(predicted_labels,ground_truth_labels)

	evaluator = modshogun.SpecificityMeasure()
	specificity = evaluator:evaluate(predicted_labels,ground_truth_labels)

	return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity
end

if debug.getinfo(3) == nill then
	print 'ContingencyTableEvaluation'
	evaluation_contingencytableevaluation_modular(unpack(parameter_list[1]))
end

Features

../examples/documented/lua_modular/features_simple_real_modular.lua

-  This examples demonstrates how to encode real-valued features in Shogun,
-  using RealFeatures.

require 'modshogun'

matrix = {{1,2,3},{4,0,0},{0,0,0},{0,5,0},{0,0,6},{9,9,9}}

parameter_list = {{matrix}}

function features_simple_real_modular(A)
	a=modshogun.RealFeatures(A)
	a:set_feature_vector({1,4,0,0,0,9}, 0)
    
	a_out = a:get_feature_matrix()
    
	return a_out
end

if debug.getinfo(3) == nill then
 	print 'simple_real'
	features_simple_real_modular(unpack(parameter_list[1]))
end

../examples/documented/lua_modular/features_string_char_modular.lua

-  This example demonstrates how to encode ASCII-strings (255 symbols) in shogun.

require 'modshogun'
require 'load'

strings = {'hey','guys','i','am','a','string'}
parameter_list={{strings}}

function features_string_char_modular(strings)
	for k, v in pairs(strings) do print(v) end
	f=modshogun.StringCharFeatures(strings, modshogun.RAWBYTE)

	print("max string length " ..f:get_max_vector_length())
	print("number of strings " .. f:get_num_vectors())
	--print ("length of first string" ..f:get_vector_length(0))
	--print ("strings" .. f:get_features())

	f:set_feature_vector({"t","e","s","t"}, 0)

	return f:get_features(), f
end

if debug.getinfo(3) == nill then
	print 'StringCharFeatures'
	features_string_char_modular(unpack(parameter_list[1]))
end

Kernel

../examples/documented/lua_modular/kernel_comm_ulong_string_modular.lua

-  This is an example for the initialization of the CommUlongString-kernel. This kernel 
-  sums over k-mere matches (k='order'). For efficient computing a preprocessor is used 
-  that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted 
-  only once. 

require 'modshogun'
require 'load'

traindat = load_dna('../data/fm_train_dna.dat')
testdat =  load_dna('../data/fm_test_dna.dat')
parameter_list = {{traindat,testdat,3,0,false},{traindat,testdat,4,0,false}}

function kernel_comm_ulong_string_modular (fm_train_dna,fm_test_dna, order, gap, reverse)
	charfeat=modshogun.StringCharFeatures(modshogun.DNA)
	charfeat:set_features(fm_train_dna)
	feats_train=modshogun.StringUlongFeatures(charfeat:get_alphabet())
	feats_train:obtain_from_char(charfeat, order-1, order, gap, reverse)
	preproc=modshogun.SortUlongString()
	preproc:init(feats_train)
	feats_train:add_preprocessor(preproc)
	feats_train:apply_preprocessor()


	charfeat=modshogun.StringCharFeatures(modshogun.DNA)
	charfeat:set_features(fm_test_dna)
	feats_test=modshogun.StringUlongFeatures(charfeat:get_alphabet())
	feats_test:obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test:add_preprocessor(preproc)
	feats_test:apply_preprocessor()

	use_sign=false

	kernel=modshogun.CommUlongStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel:get_kernel_matrix()
	kernel:init(feats_train, feats_test)
	km_test=kernel:get_kernel_matrix()
	return km_train,km_test,kernel
end

if debug.getinfo(3) == nill then
	print 'CommUlongString'
	kernel_comm_ulong_string_modular(unpack(parameter_list[1]))
end

../examples/documented/lua_modular/kernel_comm_word_string_modular.lua

-  This is an example for the initialization of the CommWordString-kernel (aka
-  Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel 
-  sums over k-mere matches (k='order'). For efficient computing a preprocessor is used 
-  that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted 
-  only once. 

require 'modshogun'
require 'load'

traindat = load_dna('../data/fm_train_dna.dat')
testdat = load_dna('../data/fm_test_dna.dat')
parameter_list = {{traindat,testdat,4,0,false, false},{traindat,testdat,4,0,False,False}}

function kernel_comm_word_string_modular (fm_train_dna,fm_test_dna, order, gap, reverse, use_sign)
	charfeat=modshogun.StringCharFeatures(modshogun.DNA)
	charfeat:set_features(fm_train_dna)
	feats_train=modshogun.StringWordFeatures(charfeat:get_alphabet())
	feats_train:obtain_from_char(charfeat, order-1, order, gap, reverse)
	
	preproc=modshogun.SortWordString()
	preproc:init(feats_train)
	feats_train:add_preprocessor(preproc)
	feats_train:apply_preprocessor()

	charfeat=modshogun.StringCharFeatures(modshogun.DNA)
	charfeat:set_features(fm_test_dna)
	feats_test=modshogun.StringWordFeatures(charfeat:get_alphabet())
	feats_test:obtain_from_char(charfeat, order-1, order, gap, reverse)
	feats_test:add_preprocessor(preproc)
	feats_test:apply_preprocessor()

	kernel=modshogun.CommWordStringKernel(feats_train, feats_train, use_sign)

	km_train=kernel:get_kernel_matrix()
	kernel:init(feats_train, feats_test)
	km_test=kernel:get_kernel_matrix()
	return km_train,km_test,kernel
end

if debug.getinfo(3) == nill then
	print 'CommWordString'
	kernel_comm_word_string_modular(unpack(parameter_list[1]))
end

../examples/documented/lua_modular/kernel_gaussian_modular.lua

-  The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.

require 'modshogun'
require 'load'

traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')

parameter_list = {{traindat,testdat, 1.3},{traindat,testdat, 1.4}}

function kernel_gaussian_modular (fm_train_real,fm_test_real,width)

	feats_train=modshogun.RealFeatures(fm_train_real)
	feats_test=modshogun.RealFeatures(fm_test_real)

	kernel=modshogun.GaussianKernel(feats_train, feats_train, width)
	
	km_train=kernel:get_kernel_matrix()
	kernel:init(feats_train, feats_test)
	km_test=kernel:get_kernel_matrix()

	return km_train,km_test,kernel
end

if debug.getinfo(3) == nill then
	print 'Gaussian'
	kernel_gaussian_modular(unpack(parameter_list[1]))
end

../examples/documented/lua_modular/kernel_linear_byte_modular.lua

-  This is an example for the initialization of a linear kernel on raw byte
-  data. 

require 'modshogun'
require 'load'

traindat = load_numbers('../data/fm_train_byte.dat')
testdat = load_numbers('../data/fm_test_byte.dat')

parameter_list={{traindat,testdat},{traindat,testdat}}

function kernel_linear_byte_modular(fm_train_byte,fm_test_byte)
	feats_train=modshogun.ByteFeatures(fm_train_byte)
	feats_test=modshogun.ByteFeatures(fm_test_byte)

	kernel=modshogun.LinearKernel(feats_train, feats_train)
	km_train=kernel:get_kernel_matrix()

	kernel:init(feats_train, feats_test)
	km_test=kernel:get_kernel_matrix()
	return kernel
end

if debug.getinfo(3) == nill then
	print 'LinearByte'
	kernel_linear_byte_modular(unpack(parameter_list[1]))
end

../examples/documented/lua_modular/kernel_linear_word_modular.lua

-  This is an example for the initialization of a linear kernel on word (2byte) 
-  data. 

require 'modshogun'
require 'load'

traindat = load_numbers('../data/fm_train_word.dat')
testdat = load_numbers('../data/fm_test_word.dat')

parameter_list={{traindat,testdat,1.2},{traindat,testdat,1.2}}

function kernel_linear_word_modular (fm_train_word,fm_test_word,scale)
	feats_train=modshogun.WordFeatures(fm_train_word)
	feats_test=modshogun.WordFeatures(fm_test_word)

	kernel=modshogun.LinearKernel(feats_train, feats_train)
	kernel:set_normalizer(modshogun.AvgDiagKernelNormalizer(scale))
	kernel:init(feats_train, feats_train)

	km_train=kernel:get_kernel_matrix()
	kernel:init(feats_train, feats_test)
	km_test=kernel:get_kernel_matrix()
	return kernel
end

if debug.getinfo(3) == nill then
	print 'LinearWord'
	kernel_linear_word_modular(unpack(parameter_list[1]))
end

../examples/documented/lua_modular/kernel_weighted_degree_string_modular.lua

-  This examples shows how to create a Weighted Degree String Kernel from data
-  and how to compute the kernel matrix from the resulting object.

require 'modshogun'
require 'load'

traindat = load_dna('../data/fm_train_dna.dat')
testdat = load_dna('../data/fm_test_dna.dat')

parameter_list = {{traindat,testdat,3},{traindat,testdat,20}}

function kernel_weighted_degree_string_modular (fm_train_dna,fm_test_dna,degree)

	feats_train=modshogun.StringCharFeatures(fm_train_dna, modshogun.DNA)
	feats_test=modshogun.StringCharFeatures(fm_test_dna, modshogun.DNA)
	
	kernel=modshogun.WeightedDegreeStringKernel(feats_train, feats_train, degree)

	weights = {}
	for i = degree, 1, -1 do
		table.insert(weights, 2*i/((degree+1)*degree))
	end
	kernel:set_wd_weights(weights)

	km_train=kernel:get_kernel_matrix()
	kernel:init(feats_train, feats_test)
	km_test=kernel:get_kernel_matrix()

	return km_train, km_test, kernel
end

if debug.getinfo(3) == nill then
	print 'WeightedDegreeString'
	kernel_weighted_degree_string_modular(unpack(parameter_list[1]))
end

Preprocessor

../examples/documented/lua_modular/preprocessor_isomap_modular.lua

-  In this example toy data is being processed using the Isomap algorithm
-  as described in
-  
-  Silva, V. D., & Tenenbaum, J. B. (2003). 
-  Global versus local methods in nonlinear dimensionality reduction. 
-  Advances in Neural Information Processing Systems 15, 15(Figure 2), 721-728. MIT Press. 
-  Retrieved from http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.9.3407&rep=rep1&type=pdf
-  
-  Before applying to the data the landmark approximation is enabled with 
-  specified number of landmarks. The landmark approximation is described in
-  
-  Sparse multidimensional scaling using landmark points
-  V De Silva, J B Tenenbaum (2004) Technology, p. 1-4
-  
-  After enabling the landmark approximation k parameter -- the number 
-  of neighbors in the k nearest neighbor graph -- is initialized.

require 'modshogun'
require 'load'

data = load_numbers('../data/fm_train_real.dat')

parameter_list = {{data}}

function preprocessor_isomap_modular(data)
	features = modshogun.RealFeatures(data)
		
	preprocessor = modshogun.Isomap()
	preprocessor:set_target_dim(1)
	preprocessor:apply_to_feature_matrix(features)

	return features
end

if debug.getinfo(3) == nill then
	print 'Isomap'
	preprocessor_isomap_modular(unpack(parameter_list[1]))
end

Regression

../examples/documented/lua_modular/regression_krr_modular.lua

-  In this example a kernelized version of ridge regression (KRR) is trained on a
-  real-valued data set. The KRR is trained with regularization parameter tau=1e-6
-  and a gaussian kernel with width=0.8. The labels of both the train and the test
-  data can be fetched via krr.classify().get_labels().

require 'modshogun'
require 'load'

traindat = load_numbers('../data/fm_train_real.dat')
testdat = load_numbers('../data/fm_test_real.dat')
label_traindat = load_labels('../data/label_train_twoclass.dat')


parameter_list = {{traindat,testdat,label_traindat,0.8,1e-6},{traindat,testdat,label_traindat,0.9,1e-7}}

function regression_krr_modular (fm_train,fm_test,label_train,width,tau)
	feats_train=modshogun.RealFeatures(fm_train)
	feats_test=modshogun.RealFeatures(fm_test)

	kernel=modshogun.GaussianKernel(feats_train, feats_train, width)

	labels=modshogun.Labels(label_train)

	krr=modshogun.KRR(tau, kernel, labels)
	krr:train(feats_train)

	kernel:init(feats_train, feats_test)
	out = krr:apply():get_labels()
	return out,kernel,krr
end

print 'KRR'
regression_krr_modular(unpack(parameter_list[1]))

Serialization

../examples/documented/lua_modular/serialization_complex_example.lua

-  In this example serialization of SVM (Support Vector Machine) is shown

require 'os'
require 'modshogun'
require 'load'

parameter_list={{5,1,10, 2.0, 10}, {10,0.3,2, 1.0, 0.1}}

function check_status(status)
	 
	assert(status == true)
	-- if  status:
  --	print "OK reading/writing .h5\n"
	--else:
	--	print "ERROR reading/writing .h5\n"
end

function concatenate(...)
	local result = ...
	for _,t in ipairs{select(2, ...)} do
		for row,rowdata in ipairs(t) do
			for col,coldata in ipairs(rowdata) do
				table.insert(result[row], coldata)
			end		
		end
	end
	return result
end

function rand_matrix(rows, cols, dist)
  local matrix = {}
	for i = 1, rows do
		matrix[i] = {}
		for j = 1, cols do
			matrix[i][j] = math.random() + dist
		end	
	end
	return matrix
end

function generate_lab(num)
	lab={}
	for i=1,num do
		lab[i]=0
	end
	for i=num+1,2*num do
		lab[i]=1
	end
	for i=2*num+1,3*num do
		lab[i]=2
	end
	for i=3*num+1,4*num do
		lab[i]=3
	end
	return lab
end

function serialization_complex_example(num, dist, dim, C, width)
	
	math.randomseed(17)

	data=concatenate(rand_matrix(dim, num, 0), rand_matrix(dim, num, dist), rand_matrix(dim, num, 2 * dist), rand_matrix(dim, num, 3 * dist))
	
	lab=generate_lab(num)

	feats=modshogun.RealFeatures(data)
	kernel=modshogun.GaussianKernel(feats, feats, width)

	labels=modshogun.Labels(lab)

	svm = modshogun.GMNPSVM(C, kernel, labels)

	feats:add_preprocessor(modshogun.NormOne())
	feats:add_preprocessor(modshogun.LogPlusOne())
	feats:set_preprocessed(1)
	svm:train(feats)

	fstream = modshogun.SerializableHdf5File("blaah.h5", "w")
	status = svm:save_serializable(fstream)
	check_status(status)

	fstream = modshogun.SerializableAsciiFile("blaah.asc", "w")
	status = svm:save_serializable(fstream)
	check_status(status)

	fstream = modshogun.SerializableJsonFile("blaah.json", "w")
	status = svm:save_serializable(fstream)
	check_status(status)

	fstream = modshogun.SerializableXmlFile("blaah.xml", "w")
	status = svm:save_serializable(fstream)
	check_status(status)


	fstream = modshogun.SerializableHdf5File("blaah.h5", "r")
	new_svm=modshogun.GMNPSVM()
	status = new_svm:load_serializable(fstream)
	check_status(status)
	new_svm:train()

	fstream = modshogun.SerializableAsciiFile("blaah.asc", "r")
	new_svm=modshogun.GMNPSVM()
	status = new_svm:load_serializable(fstream)
	check_status(status)
	new_svm:train()

	fstream = modshogun.SerializableJsonFile("blaah.json", "r")
	new_svm=modshogun.GMNPSVM()
	status = new_svm:load_serializable(fstream)
	check_status(status)
	new_svm:train()

	fstream = modshogun.SerializableXmlFile("blaah.xml", "r")
	new_svm=modshogun.GMNPSVM()
	status = new_svm:load_serializable(fstream)
	check_status(status)
	new_svm:train()

	os.remove("blaah.h5")
	os.remove("blaah.asc")
	os.remove("blaah.json")
	os.remove("blaah.xml")
	os.remove("blaah.h5")
	os.remove("blaah.asc")
	os.remove("blaah.json")
	os.remove("blaah.xml")

	return svm,new_svm
end

if debug.getinfo(3) == nill then
	print 'Serialization SVMLight'
	serialization_complex_example(unpack(parameter_list[1]))
end