SHOGUN
4.2.0
|
This page lists ready to run shogun examples for the Lua Modular interface.
To run the examples issue
lua name_of_example.lua
-- In this example the Averaged Perceptron used to classify toy data. require 'modshogun' require 'load' traindat = load_numbers('../data/fm_train_real.dat') testdat = load_numbers('../data/fm_test_real.dat') label_traindat = load_labels('../data/label_train_twoclass.dat') parameter_list = {{traindat,testdat,label_traindat,1.,1000,1},{traindat,testdat,label_traindat,1.,100,1}} function classifier_averaged_perceptron_modular (fm_train_real,fm_test_real,label_train_twoclass,learn_rate,max_iter,num_threads) feats_train=modshogun.RealFeatures(fm_train_real) feats_test=modshogun.RealFeatures(fm_test_real) labels=modshogun.BinaryLabels(label_train_twoclass) perceptron=modshogun.AveragedPerceptron(feats_train, labels) perceptron:set_learn_rate(learn_rate) perceptron:set_max_iter(max_iter) perceptron:train() perceptron:set_features(feats_test) out_labels = perceptron:apply():get_labels() return perceptron, out_labels end if debug.getinfo(3) == nill then print 'AveragedPerceptron' classifier_averaged_perceptron_modular(unpack(parameter_list[1])) end
-- An approach as applied below, which shows the processing of input data -- from a file becomes a crucial factor for writing your own sample applications. -- This approach is just one example of what can be done using the distance -- functions provided by shogun. -- -- First, you need to determine what type your data will be, because this -- will determine the distance function you can use. -- -- This example loads two stored matrices of real values from different -- files and initializes the matrices to 'RealFeatures'. -- Each column of the matrices corresponds to one data point. -- -- The distance initialized by two data sets (the same data set as shown in the -- first call) controls the processing of the given data points, where a pairwise -- distance matrix is computed by 'get_distance_matrix'. -- -- The resulting distance matrix can be reaccessed by 'get_distance_matrix'. -- -- The method call 'init'* binds the given data sets, where a pairwise distance -- matrix between these two data sets is computed by 'get_distance_matrix'. -- -- The resulting distance matrix can be reaccessed by 'get_distance_matrix'. -- -- *Note that the previous computed distance matrix can no longer be -- reaccessed by 'get_distance_matrix'. -- -- For more details see doc/classshogun_1_1CBrayCurtisDistance.html. -- -- Obviously, using the Bray Curtis distance is not limited to this showcase -- example. require 'modshogun' require 'load' traindat = load_numbers('../data/fm_train_real.dat') testdat = load_numbers('../data/fm_test_real.dat') parameter_list = {{traindat,testdat},{traindat,testdat}} function distance_braycurtis_modular (fm_train_real,fm_test_real) feats_train=modshogun.RealFeatures(fm_train_real) feats_test=modshogun.RealFeatures(fm_test_real) distance=modshogun.BrayCurtisDistance(feats_train, feats_train) dm_train=distance:get_distance_matrix() distance:init(feats_train, feats_test) dm_test=distance:get_distance_matrix() return distance,dm_train,dm_test end if debug.getinfo(3) == nill then print 'BrayCurtisDistance' distance_braycurtis_modular(unpack(parameter_list[1])) end
-- In this example various (accuracy, error rate, ..) measures are being computed -- for the pair of ground truth toy data and random data. require 'modshogun' require 'load' ground_truth = load_labels('../data/label_train_twoclass.dat') math.randomseed(17) predicted = {} for i = 1, #ground_truth do table.insert(predicted, math.random()) end parameter_list = {{ground_truth,predicted}} function evaluation_contingencytableevaluation_modular(ground_truth, predicted) ground_truth_labels = modshogun.BinaryLabels(ground_truth) predicted_labels = modshogun.BinaryLabels(predicted) base_evaluator = modshogun.ContingencyTableEvaluation() base_evaluator:evaluate(predicted_labels,ground_truth_labels) evaluator = modshogun.AccuracyMeasure() accuracy = evaluator:evaluate(predicted_labels,ground_truth_labels) evaluator = modshogun.ErrorRateMeasure() errorrate = evaluator:evaluate(predicted_labels,ground_truth_labels) evaluator = modshogun.BALMeasure() bal = evaluator:evaluate(predicted_labels,ground_truth_labels) evaluator = modshogun.WRACCMeasure() wracc = evaluator:evaluate(predicted_labels,ground_truth_labels) evaluator = modshogun.F1Measure() f1 = evaluator:evaluate(predicted_labels,ground_truth_labels) evaluator = modshogun.CrossCorrelationMeasure() crosscorrelation = evaluator:evaluate(predicted_labels,ground_truth_labels) evaluator = modshogun.RecallMeasure() recall = evaluator:evaluate(predicted_labels,ground_truth_labels) evaluator = modshogun.PrecisionMeasure() precision = evaluator:evaluate(predicted_labels,ground_truth_labels) evaluator = modshogun.SpecificityMeasure() specificity = evaluator:evaluate(predicted_labels,ground_truth_labels) return accuracy, errorrate, bal, wracc, f1, crosscorrelation, recall, precision, specificity end if debug.getinfo(3) == nill then print 'ContingencyTableEvaluation' evaluation_contingencytableevaluation_modular(unpack(parameter_list[1])) end
require 'modshogun' matrix = {{1,2,3},{4,0,0},{0,0,0},{0,5,0},{0,0,6},{9,9,9}} parameter_list = {{matrix}} function features_dense_real_modular(A) a=modshogun.RealFeatures(A) a:set_feature_vector({1,4,0,0,0,9}, 0) a_out = a:get_feature_matrix() return a_out end if debug.getinfo(3) == nill then print 'dense_real' features_dense_real_modular(unpack(parameter_list[1])) end
-- This example demonstrates how to encode ASCII-strings (255 symbols) in shogun. require 'modshogun' require 'load' strings = {'hey','guys','i','am','a','string'} parameter_list={{strings}} function features_string_char_modular(strings) --for k, v in pairs(strings) do print(v) end --FIXME --f=modshogun.StringCharFeatures(strings, modshogun.RAWBYTE) --print("max string length " ..f:get_max_vector_length()) --print("number of strings " .. f:get_num_vectors()) --print ("length of first string" ..f:get_vector_length(0)) --print ("strings" .. f:get_features()) --FIXME --f:set_feature_vector({"t","e","s","t"}, 0) --return f:get_features(), f end if debug.getinfo(3) == nill then print 'StringCharFeatures' features_string_char_modular(unpack(parameter_list[1])) end
-- This is an example for the initialization of the CommUlongString-kernel. This kernel -- sums over k-mere matches (k='order'). For efficient computing a preprocessor is used -- that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted -- only once. require 'modshogun' require 'load' traindat = load_dna('../data/fm_train_dna.dat') testdat = load_dna('../data/fm_test_dna.dat') parameter_list = {{traindat,testdat,3,0,false},{traindat,testdat,4,0,false}} function kernel_comm_ulong_string_modular (fm_train_dna,fm_test_dna, order, gap, reverse) --charfeat=modshogun.StringCharFeatures(modshogun.DNA) --charfeat:set_features(fm_train_dna) --feats_train=modshogun.StringUlongFeatures(charfeat:get_alphabet()) --feats_train:obtain_from_char(charfeat, order-1, order, gap, reverse) --preproc=modshogun.SortUlongString() --preproc:init(feats_train) --feats_train:add_preprocessor(preproc) --feats_train:apply_preprocessor() --charfeat=modshogun.StringCharFeatures(modshogun.DNA) --charfeat:set_features(fm_test_dna) --feats_test=modshogun.StringUlongFeatures(charfeat:get_alphabet()) --feats_test:obtain_from_char(charfeat, order-1, order, gap, reverse) --feats_test:add_preprocessor(preproc) --feats_test:apply_preprocessor() --use_sign=false --kernel=modshogun.CommUlongStringKernel(feats_train, feats_train, use_sign) --km_train=kernel:get_kernel_matrix() --kernel:init(feats_train, feats_test) --km_test=kernel:get_kernel_matrix() --return km_train,km_test,kernel end if debug.getinfo(3) == nill then print 'CommUlongString' kernel_comm_ulong_string_modular(unpack(parameter_list[1])) end
-- This is an example for the initialization of the CommWordString-kernel (aka -- Spectrum or n-gram kernel; its name is derived from the unix command comm). This kernel -- sums over k-mere matches (k='order'). For efficient computing a preprocessor is used -- that extracts and sorts all k-mers. If 'use_sign' is set to one each k-mere is counted -- only once. require 'modshogun' require 'load' traindat = load_dna('../data/fm_train_dna.dat') testdat = load_dna('../data/fm_test_dna.dat') parameter_list = {{traindat,testdat,4,0,false, false},{traindat,testdat,4,0,False,False}} function kernel_comm_word_string_modular (fm_train_dna,fm_test_dna, order, gap, reverse, use_sign) --charfeat=modshogun.StringCharFeatures(modshogun.DNA) --charfeat:set_features(fm_train_dna) --feats_train=modshogun.StringWordFeatures(charfeat:get_alphabet()) --feats_train:obtain_from_char(charfeat, order-1, order, gap, reverse) -- --preproc=modshogun.SortWordString() --preproc:init(feats_train) --feats_train:add_preprocessor(preproc) --feats_train:apply_preprocessor() -- --charfeat=modshogun.StringCharFeatures(modshogun.DNA) --charfeat:set_features(fm_test_dna) --feats_test=modshogun.StringWordFeatures(charfeat:get_alphabet()) --feats_test:obtain_from_char(charfeat, order-1, order, gap, reverse) --feats_test:add_preprocessor(preproc) --feats_test:apply_preprocessor() -- --kernel=modshogun.CommWordStringKernel(feats_train, feats_train, use_sign) -- --km_train=kernel:get_kernel_matrix() --kernel:init(feats_train, feats_test) --km_test=kernel:get_kernel_matrix() --return km_train,km_test,kernel end if debug.getinfo(3) == nill then print 'CommWordString' kernel_comm_word_string_modular(unpack(parameter_list[1])) end
-- The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features. require 'modshogun' require 'load' traindat = load_numbers('../data/fm_train_real.dat') testdat = load_numbers('../data/fm_test_real.dat') parameter_list = {{traindat,testdat, 1.3},{traindat,testdat, 1.4}} function kernel_gaussian_modular (fm_train_real,fm_test_real,width) feats_train=modshogun.RealFeatures(fm_train_real) feats_test=modshogun.RealFeatures(fm_test_real) kernel=modshogun.GaussianKernel(feats_train, feats_train, width) km_train=kernel:get_kernel_matrix() kernel:init(feats_train, feats_test) km_test=kernel:get_kernel_matrix() return km_train,km_test,kernel end if debug.getinfo(3) == nill then print 'Gaussian' kernel_gaussian_modular(unpack(parameter_list[1])) end
-- This is an example for the initialization of a linear kernel on raw byte -- data. require 'modshogun' require 'load' traindat = load_numbers('../data/fm_train_byte.dat') testdat = load_numbers('../data/fm_test_byte.dat') parameter_list={{traindat,testdat},{traindat,testdat}} function kernel_linear_byte_modular(fm_train_byte,fm_test_byte) feats_train=modshogun.ByteFeatures(fm_train_byte) feats_test=modshogun.ByteFeatures(fm_test_byte) kernel=modshogun.LinearKernel(feats_train, feats_train) km_train=kernel:get_kernel_matrix() kernel:init(feats_train, feats_test) km_test=kernel:get_kernel_matrix() return kernel end if debug.getinfo(3) == nill then print 'LinearByte' kernel_linear_byte_modular(unpack(parameter_list[1])) end
-- This is an example for the initialization of a linear kernel on word (2byte) -- data. require 'modshogun' require 'load' traindat = load_numbers('../data/fm_train_word.dat') testdat = load_numbers('../data/fm_test_word.dat') parameter_list={{traindat,testdat,1.2},{traindat,testdat,1.2}} function kernel_linear_word_modular (fm_train_word,fm_test_word,scale) feats_train=modshogun.WordFeatures(fm_train_word) feats_test=modshogun.WordFeatures(fm_test_word) kernel=modshogun.LinearKernel(feats_train, feats_train) kernel:set_normalizer(modshogun.AvgDiagKernelNormalizer(scale)) kernel:init(feats_train, feats_train) km_train=kernel:get_kernel_matrix() kernel:init(feats_train, feats_test) km_test=kernel:get_kernel_matrix() return kernel end if debug.getinfo(3) == nill then print 'LinearWord' kernel_linear_word_modular(unpack(parameter_list[1])) end
-- This examples shows how to create a Weighted Degree String Kernel from data -- and how to compute the kernel matrix from the resulting object. require 'modshogun' require 'load' traindat = load_dna('../data/fm_train_dna.dat') testdat = load_dna('../data/fm_test_dna.dat') parameter_list = {{traindat,testdat,3},{traindat,testdat,20}} function kernel_weighted_degree_string_modular (fm_train_dna,fm_test_dna,degree) --feats_train=modshogun.StringCharFeatures(fm_train_dna, modshogun.DNA) --feats_test=modshogun.StringCharFeatures(fm_test_dna, modshogun.DNA) -- --kernel=modshogun.WeightedDegreeStringKernel(feats_train, feats_train, degree) -- --weights = {} --for i = degree, 1, -1 do --table.insert(weights, 2*i/((degree+1)*degree)) --end --kernel:set_wd_weights(weights) -- --km_train=kernel:get_kernel_matrix() --kernel:init(feats_train, feats_test) --km_test=kernel:get_kernel_matrix() -- --return km_train, km_test, kernel end if debug.getinfo(3) == nill then print 'WeightedDegreeString' kernel_weighted_degree_string_modular(unpack(parameter_list[1])) end
-- In this example serialization of SVM (Support Vector Machine) is shown require 'os' require 'modshogun' require 'load' parameter_list={{5,1,10, 2.0, 10}, {10,0.3,2, 1.0, 0.1}} function check_status(status) assert(status == true) -- if status: -- print "OK reading/writing .h5\n" --else: -- print "ERROR reading/writing .h5\n" end function concatenate(...) local result = ... for _,t in ipairs{select(2, ...)} do for row,rowdata in ipairs(t) do for col,coldata in ipairs(rowdata) do table.insert(result[row], coldata) end end end return result end function rand_matrix(rows, cols, dist) local matrix = {} for i = 1, rows do matrix[i] = {} for j = 1, cols do matrix[i][j] = math.random() + dist end end return matrix end function generate_lab(num) lab={} for i=1,num do lab[i]=0 end for i=num+1,2*num do lab[i]=1 end for i=2*num+1,3*num do lab[i]=2 end for i=3*num+1,4*num do lab[i]=3 end return lab end function serialization_complex_example(num, dist, dim, C, width) math.randomseed(17) data=concatenate(rand_matrix(dim, num, 0), rand_matrix(dim, num, dist), rand_matrix(dim, num, 2 * dist), rand_matrix(dim, num, 3 * dist)) lab=generate_lab(num) feats=modshogun.RealFeatures(data) kernel=modshogun.GaussianKernel(feats, feats, width) labels=modshogun.MulticlassLabels(lab) svm = modshogun.GMNPSVM(C, kernel, labels) feats:add_preprocessor(modshogun.NormOne()) feats:add_preprocessor(modshogun.LogPlusOne()) feats:set_preprocessed(1) svm:train(feats) fstream = modshogun.SerializableHdf5File("blaah.h5", "w") status = svm:save_serializable(fstream) check_status(status) fstream = modshogun.SerializableAsciiFile("blaah.asc", "w") status = svm:save_serializable(fstream) check_status(status) -- fstream = modshogun.SerializableJsonFile("blaah.json", "w") -- status = svm:save_serializable(fstream) -- check_status(status) fstream = modshogun.SerializableXmlFile("blaah.xml", "w") status = svm:save_serializable(fstream) check_status(status) fstream = modshogun.SerializableHdf5File("blaah.h5", "r") new_svm=modshogun.GMNPSVM() status = new_svm:load_serializable(fstream) check_status(status) new_svm:train() fstream = modshogun.SerializableAsciiFile("blaah.asc", "r") new_svm=modshogun.GMNPSVM() status = new_svm:load_serializable(fstream) check_status(status) new_svm:train() -- fstream = modshogun.SerializableJsonFile("blaah.json", "r") -- new_svm=modshogun.GMNPSVM() -- status = new_svm:load_serializable(fstream) -- check_status(status) -- new_svm:train() fstream = modshogun.SerializableXmlFile("blaah.xml", "r") new_svm=modshogun.GMNPSVM() status = new_svm:load_serializable(fstream) check_status(status) new_svm:train() os.remove("blaah.h5") os.remove("blaah.asc") -- os.remove("blaah.json") os.remove("blaah.xml") return svm,new_svm end if debug.getinfo(3) == nill then print 'Serialization SVMLight' serialization_complex_example(unpack(parameter_list[1])) end