|
SHOGUN
4.2.0
|
This page lists ready to run shogun examples for the Ruby Modular interface.
To run the examples issue
ruby name_of_example.rb
# This example shows how to use a custom defined kernel function for training a # two class Support Vector Machine (SVM) classifier on randomly generated # examples. The SVM regularization constant is set to C=1. require 'modshogun' require 'load' require 'pp' parameter_list = [[1,7],[2,8]] def classifier_custom_kernel_modular(c=1,dim=7) Modshogun::Math.init_random(c) NArray.srand(17) lab = (2*NArray.float(dim).random! - 1).sign pp lab data= NMatrix.float(dim, dim).random! symdata=data*data.transpose + 10*NMatrix.float(dim,dim).unit kernel=Modshogun::CustomKernel.new kernel.set_full_kernel_matrix_from_full(data) labels=Modshogun::BinaryLabels.new(lab) svm=Modshogun::LibSVM.new(c, kernel, labels) svm.train() predictions =svm.apply() out=svm.apply().get_labels() return svm,out end if __FILE__ == $0 puts 'custom_kernel' pp classifier_custom_kernel_modular(*parameter_list[0]) end
# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values from different
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
#
# The distance initialized by two data sets (the same data set as shown in the
# first call) controls the processing of the given data points, where a pairwise
# distance matrix is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
#
# The method call 'init'* binds the given data sets, where a pairwise distance
# matrix between these two data sets is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
#
# *Note that the previous computed distance matrix can no longer be
# reaccessed by 'get_distance_matrix'.
#
# For more details see doc/classshogun_1_1CBrayCurtisDistance.html.
#
# Obviously, using the Bray Curtis distance is not limited to this showcase
# example.
require 'modshogun'
require 'pp'
require 'load'
traindat = LoadMatrix.load_numbers('../data/fm_train_real.dat')
testdat = LoadMatrix.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_braycurtis_modular(fm_train_real=traindat, fm_test_real=testdat)
feats_train=Modshogun::RealFeatures.new
feats_train.set_feature_matrix(fm_train_real)
feats_test=Modshogun::RealFeatures.new
feats_test.set_feature_matrix(fm_test_real)
distance=Modshogun::BrayCurtisDistance.new(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
end
if __FILE__ == $0
puts 'BrayCurtisDistance'
pp distance_braycurtis_modular(*parameter_list[0])
end
# An approach as applied below, which shows the processing of input data
# from a file becomes a crucial factor for writing your own sample applications.
# This approach is just one example of what can be done using the distance
# functions provided by shogun.
#
# First, you need to determine what type your data will be, because this
# will determine the distance function you can use.
#
# This example loads two stored matrices of real values from different
# files and initializes the matrices to 'RealFeatures'.
# Each column of the matrices corresponds to one data point.
#
# The distance initialized by two data sets (the same data set as shown in the
# first call) controls the processing of the given data points, where a pairwise
# distance (dissimilarity ratio) matrix is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
#
# The method call 'init'* binds the given data sets, where a pairwise distance
# matrix between these two data sets is computed by 'get_distance_matrix'.
#
# The resulting distance matrix can be reaccessed by 'get_distance_matrix'.
#
# *Note that the previous computed distance matrix can no longer be
# reaccessed by 'get_distance_matrix'.
#
# For more details see doc/classshogun_1_1CCanberraMetric.html.
#
# Obviously, using the Canberra distance is not limited to this showcase
# example.
require 'modshogun'
require 'pp'
require 'load'
traindat = LoadMatrix.load_numbers('../data/fm_train_real.dat')
testdat = LoadMatrix.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat],[traindat,testdat]]
def distance_canberra_modular(fm_train_real=traindat,fm_test_real=testdat)
feats_train=Modshogun::RealFeatures.new
feats_train.set_feature_matrix(fm_train_real)
feats_test=Modshogun::RealFeatures.new
feats_test.set_feature_matrix(fm_test_real)
distance=Modshogun::CanberraMetric.new(feats_train, feats_train)
dm_train=distance.get_distance_matrix()
distance.init(feats_train, feats_test)
dm_test=distance.get_distance_matrix()
return distance,dm_train,dm_test
end
if __FILE__ == $0
puts 'CanberaMetric'
pp distance_canberra_modular(*parameter_list[0])
end
# In this example the Histogram algorithm object computes a histogram over all
# 16bit unsigned integers in the features.
require 'modshogun'
require 'pp'
require 'load'
traindna = LoadMatrix.load_dna('../data/fm_train_dna.dat')
parameter_list = [[traindna, 3, 0, false], [traindna, 4, 0, false]]
def distribution_histogram_modular(fm_dna=traindna, order=3, gap=0, reverse=false)
charfeat=Modshogun::StringCharFeatures.new(Modshogun::DNA)
charfeat.set_features(fm_dna)
feats=Modshogun::StringWordFeatures.new(charfeat.get_alphabet())
feats.obtain_from_char(charfeat, order-1, order, gap, reverse)
histo=Modshogun::Histogram.new
histo.set_features(feats)
histo.train()
histo.get_histogram()
num_examples=feats.get_num_vectors()
num_param=histo.get_num_model_parameters()
#for i in xrange(num_examples):
# for j in xrange(num_param):
# histo.get_log_derivative(j, i)
out_likelihood = histo.get_log_likelihood()
out_sample = histo.get_log_likelihood_sample()
return histo,out_sample,out_likelihood
end
###########################################################################
# call functions
###########################################################################
if __FILE__ == $0
puts 'Histogram'
pp distribution_histogram_modular(*parameter_list[0])
end
# This example shows how to read and write plain ascii files, binary files and
# hdf5 datasets.
#
# For ascii files it shows how to obtain shogun's RealFeatures
# (a simple feature matrix of doubles with 1 column == 1 example, nr_columns ==
# number of examples) and also sparse features in SVM light format.
#
# Binary files use some custom native format and datasets can be read/written
# from/to hdf5 files with arbitrary group / path.
require 'load'
require 'modshogun'
require 'pp'
data=LoadMatrix.load_numbers('../data/fm_train_real.dat')
label=LoadMatrix.load_numbers('../data/label_train_twoclass.dat')
parameter_list=[[data,label]]
def features_io_modular(fm_train_real, label_train_twoclass)
feats=Modshogun::SparseRealFeatures.new
feats.set_full_feature_matrix(fm_train_real)
feats2=Modshogun::SparseRealFeatures.new
f=Modshogun::BinaryFile.new("fm_train_sparsereal.bin","w")
feats.save(f)
f.close()
f=Modshogun::LibSVMFile.new("fm_train_sparsereal.ascii","w")
feats.save(f)
f.close()
f=Modshogun::BinaryFile.new("fm_train_sparsereal.bin", "r")
feats2.load(f)
f.close()
f=Modshogun::LibSVMFile.new("fm_train_sparsereal.ascii")
feats2.load(f)
f.close()
feats=Modshogun::RealFeatures.new
feats.set_feature_matrix(fm_train_real)
feats2=Modshogun::RealFeatures.new
f=Modshogun::BinaryFile.new("fm_train_real.bin","w")
feats.save(f)
f.close()
f=Modshogun::HDF5File.new("fm_train_real.h5","w", "/data/doubles")
feats.save(f)
f.close()
f=Modshogun::CSVFile.new("fm_train_real.ascii","w")
feats.save(f)
f.close()
f=Modshogun::BinaryFile.new("fm_train_real.bin")
feats2.load(f)
f.close()
f=Modshogun::CSVFile.new("fm_train_real.ascii")
feats2.load(f)
f.close()
lab=Modshogun::MulticlassLabels.new([0.0,1.0,2.0,3.0])
lab2=Modshogun::MulticlassLabels.new
f=Modshogun::CSVFile.new("label_train_twoclass.ascii","w")
lab.save(f)
f.close()
f=Modshogun::BinaryFile.new("label_train_twoclass.bin","w")
lab.save(f)
f.close()
f=Modshogun::HDF5File.new("label_train_real.h5","w", "/data/labels")
lab.save(f)
f.close()
f=Modshogun::CSVFile.new("label_train_twoclass.ascii")
lab2.load(f)
f.close()
f=Modshogun::BinaryFile.new("label_train_twoclass.bin")
lab2.load(f)
f.close()
f=Modshogun::HDF5File.new("fm_train_real.h5","r", "/data/doubles")
feats2.load(f)
f.close()
f=Modshogun::HDF5File.new("label_train_real.h5","r", "/data/labels")
lab2.load(f)
f.close()
#pp lab.get_labels()
##clean up
#import os
#for f in ['fm_train_sparsereal.bin','fm_train_sparsereal.ascii',
# 'fm_train_real.bin','fm_train_real.h5','fm_train_real.ascii',
# 'label_train_real.h5', 'label_train_twoclass.ascii','label_train_twoclass.bin']:
# os.unlink(f)
#end
return feats, feats2, lab, lab2
end
if __FILE__ == $0
puts 'Features IO'
pp features_io_modular(*parameter_list[0])
end
# This example demonstrates how to read and write data in the SVMLight Format
# from Shogun.
#
require 'modshogun'
require 'pp'
parameter_list=[['../data/train_sparsereal.light']]
def features_read_svmlight_format_modular(fname)
f=Modshogun::SparseRealFeatures.new
lab=f.load_with_labels(Modshogun::LibSVMFile.new(fname))
f.save_with_labels(Modshogun::LibSVMFile.new('testwrite.light', 'w'), lab)
end
if __FILE__ == $0
puts 'Reading SVMLIGHT format'
pp features_read_svmlight_format_modular(*parameter_list[0])
end
# This example demonstrates how to encode ASCII-strings (255 symbols) in shogun. require 'modshogun' require 'pp' strings=['hey','guys','i','am','a','string'] parameter_list=[strings] def features_string_char_modular(strings) #create string features f=Modshogun::StringCharFeatures.new(strings, Modshogun::RAWBYTE) #and output several stats #puts "max string length", f.get_max_vector_length #puts "number of strings", f.get_num_vectors #puts "length of first string", f.get_vector_length(0) #puts "string[5]", f.get_feature_vector(5) #puts "strings", f.get_features #replace string 0 f.set_feature_vector(['t','e','s','t'], 0) return f.get_features, f end if __FILE__ == $0 puts 'StringCharFeatures' pp features_string_char_modular(*parameter_list) end
# In this example the ANOVA kernel is being computed for toy data.
require 'modshogun'
require 'pp'
require 'load'
###########################################################################
# anova kernel
###########################################################################
traindat = LoadMatrix.load_numbers('../data/fm_train_real.dat')
testdat = LoadMatrix.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,2,10], [traindat,testdat,5,10]]
def kernel_anova_modular(fm_train_real=traindat,fm_test_real=testdat,cardinality=2, size_cache=10)
feats_train=Modshogun::RealFeatures.new
feats_train.set_feature_matrix(fm_train_real)
feats_test=Modshogun::RealFeatures.new
feats_test.set_feature_matrix(fm_test_real)
kernel=Modshogun::ANOVAKernel.new(feats_train, feats_train, cardinality, size_cache)
for i in 0..feats_train.get_num_vectors
for j in 0..feats_train.get_num_vectors
k1 = kernel.compute_rec1(i,j)
k2 = kernel.compute_rec2(i,j)
if (k1-k2).abs > 1e-10
puts "|#{k1}|#{k2}|"
end
end
end
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train, km_test, kernel
end
if __FILE__ == $0
puts 'ANOVA'
pp kernel_anova_modular(*parameter_list[0])
end
# This example demonstrates the use of the AUC Kernel, which
# can be used to maximize AUC instead of margin in SVMs.
require 'load'
require 'modshogun'
require 'pp'
traindat = LoadMatrix.load_numbers('../data/fm_train_real.dat')
testdat = LoadMatrix.load_labels('../data/label_train_twoclass.dat')
parameter_list = [[traindat,testdat,1.7], [traindat,testdat,1.6]]
def kernel_auc_modular(fm_train_real=traindat,label_train_real=testdat,width=1.7)
# *** feats_train=RealFeatures(fm_train_real)
feats_train=Modshogun::RealFeatures.new
feats_train.set_feature_matrix(fm_train_real)
# *** subkernel=GaussianKernel(feats_train, feats_train, width)
subkernel=Modshogun::GaussianKernel.new(feats_train, feats_train, width)
# *** kernel=AUCKernel(0, subkernel)
kernel=Modshogun::AUCKernel.new(0, subkernel)
kernel.setup_auc_maximization( Modshogun::BinaryLabels.new(label_train_real) )
km_train=kernel.get_kernel_matrix()
return kernel
end
if __FILE__ == $0
puts 'AUC'
pp kernel_auc_modular(*parameter_list[0])
end
# In this example the Cauchy kernel is being computed for toy data.
require 'modshogun'
require 'pp'
require 'load'
traindat = LoadMatrix.load_numbers('../data/fm_train_real.dat')
testdat = LoadMatrix.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 10.0]]
def kernel_cauchy_modular(fm_train_real=traindat,fm_test_real=testdat, sigma=1.0)
feats_train=Modshogun::RealFeatures.new
feats_train.set_feature_matrix(fm_train_real)
feats_test=Modshogun::RealFeatures.new
feats_test.set_feature_matrix(fm_test_real)
distance=Modshogun::EuclideanDistance.new(feats_train, feats_train)
kernel=Modshogun::CauchyKernel.new(feats_train, feats_train, sigma, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
end
if __FILE__ == $0
puts 'Cauchy'
pp kernel_cauchy_modular(*parameter_list[0])
end
# This is an example for the initialization of the chi2-kernel on real data, where
# each column of the matrices corresponds to one training/test example.
require 'modshogun'
require 'pp'
require 'load'
###########################################################################
# chi2 kernel
###########################################################################
traindat = LoadMatrix.load_numbers('../data/fm_train_real.dat')
testdat = LoadMatrix.load_numbers('../data/fm_test_real.dat')
parameter_list = [[traindat,testdat,1.4,10], [traindat,testdat,1.5,10]]
def kernel_chi2_modular(fm_train_real=traindat,fm_test_real=testdat,width=1.4, size_cache=10)
feats_train=Modshogun::RealFeatures.new
feats_train.set_feature_matrix(fm_train_real)
feats_test=Modshogun::RealFeatures.new
feats_test.set_feature_matrix(fm_test_real)
kernel=Modshogun::Chi2Kernel.new(feats_train, feats_train, width, size_cache)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
end
if __FILE__ == $0
puts 'Chi2'
pp kernel_chi2_modular(*parameter_list[0])
end
# In this example the circular kernel is being computed for toy data.
require 'modshogun'
require 'pp'
require 'load'
traindat = LoadMatrix.load_numbers('../data/fm_train_real.dat')
testdat = LoadMatrix.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.0],[traindat,testdat, 5.0]]
def kernel_circular_modular(fm_train_real=traindat,fm_test_real=testdat, sigma=1.0)
feats_train=Modshogun::RealFeatures.new
feats_train.set_feature_matrix(fm_train_real)
feats_test=Modshogun::RealFeatures.new
feats_test.set_feature_matrix(fm_test_real)
distance=Modshogun::EuclideanDistance.new(feats_train, feats_train)
kernel=Modshogun::CircularKernel.new(feats_train, feats_train, sigma, distance)
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
return km_train,km_test,kernel
end
if __FILE__ == $0
puts 'Circular'
pp kernel_circular_modular(*parameter_list[0])
end
# In this example the combined kernel of custom kernel and poly kernel is being computed for toy data.
require 'modshogun'
require 'pp'
require 'load'
traindat = LoadMatrix.load_numbers('../data/fm_train_real.dat')
testdat = LoadMatrix.load_numbers('../data/fm_test_real.dat')
label_traindat = LoadMatrix.load_labels('../data/label_train_twoclass.dat')
parameter_list= [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]]
def kernel_combined_custom_poly_modular(fm_train_real = traindat,fm_test_real = testdat,fm_label_twoclass=label_traindat)
kernel = Modshogun::CombinedKernel.new
feats_train = Modshogun::CombinedFeatures.new
tfeats = Modshogun::RealFeatures.new
tfeats.set_feature_matrix(fm_train_real)
tkernel = Modshogun::PolyKernel.new(10,3)
tkernel.init(tfeats, tfeats)
k = tkernel.get_kernel_matrix()
f = Modshogun::CustomKernel.new
f.set_full_kernel_matrix_from_full(k)
kernel.append_kernel(f)
subkfeats_train = Modshogun::RealFeatures.new
subkfeats_train.set_feature_matrix(fm_train_real)
feats_train.append_feature_obj(subkfeats_train)
subkernel = Modshogun::PolyKernel.new(10,2)
kernel.append_kernel(subkernel)
kernel.init(feats_train, feats_train)
labels = Modshogun::BinaryLabels.new(fm_label_twoclass)
svm = Modshogun::LibSVM.new(1.0, kernel, labels)
svm.train()
kernel = Modshogun::CombinedKernel.new
feats_pred = Modshogun::CombinedFeatures.new
pfeats = Modshogun::RealFeatures.new
pfeats.set_feature_matrix(fm_test_real)
tkernel = Modshogun::PolyKernel.new(10,3)
tkernel.init(tfeats, pfeats)
k = tkernel.get_kernel_matrix()
f = Modshogun::CustomKernel.new
f.set_full_kernel_matrix_from_full(k)
kernel.append_kernel(f)
subkfeats_test = Modshogun::RealFeatures.new
subkfeats_test.set_feature_matrix(fm_test_real)
feats_pred.append_feature_obj(subkfeats_test)
subkernel = Modshogun::PolyKernel.new(10, 2)
kernel.append_kernel(subkernel)
kernel.init(feats_train, feats_pred)
svm.set_kernel(kernel)
svm.apply()
km_train=kernel.get_kernel_matrix()
return km_train,kernel
end
if __FILE__ == $0
puts 'Combined Custom Poly Modular'
pp kernel_combined_custom_poly_modular(*parameter_list[0])
end
# The well known Gaussian kernel (swiss army knife for SVMs) on dense real valued features.
require 'modshogun'
require 'load'
require 'pp'
traindat = LoadMatrix.load_numbers('../data/fm_train_real.dat')
testdat = LoadMatrix.load_numbers('../data/fm_test_real.dat')
parameter_list=[[traindat,testdat, 1.3],[traindat,testdat, 1.4]]
def kernel_gaussian_modular(fm_train_real=traindat,fm_test_real=testdat, width=1.3)
pp fm_train_real
feats_train=Modshogun::RealFeatures.new
feats_train.set_feature_matrix fm_train_real
feats_test=Modshogun::RealFeatures.new
feats_test.set_feature_matrix fm_test_real
kernel=Modshogun::GaussianKernel.new feats_train, feats_train, width
km_train=kernel.get_kernel_matrix()
kernel.init(feats_train, feats_test)
km_test=kernel.get_kernel_matrix()
pp km_train
return km_train,km_test,kernel
end
if __FILE__ == $0 then
puts 'Gaussian'
kernel_gaussian_modular(*parameter_list[0])
end
# In this example a complex model parameters selection tree
# is being constructed
require 'modshogun'
require 'load'
require 'pp'
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# Written (W) 2011 Heiko Strathmann
# Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
# Trancekoded (T) 2011 Justin Patera aka serialhex
def modelselection_parameter_tree_modular()
root=Modshogun::ModelSelectionParameters.new
combinations=root.get_combinations()
combinations.get_num_elements()
c=Modshogun::ModelSelectionParameters.new('C')
root.append_child(c)
c.build_values(1, 11, Modshogun::R_EXP)
power_kernel=Modshogun::PowerKernel.new
param_power_kernel=Modshogun::ModelSelectionParameters.new('kernel', power_kernel)
root.append_child(param_power_kernel)
param_power_kernel_degree=Modshogun::ModelSelectionParameters.new('degree')
param_power_kernel_degree.build_values(1, 1, Modshogun::R_EXP)
param_power_kernel.append_child(param_power_kernel_degree)
metric1=Modshogun::MinkowskiMetric.new(10)
param_power_kernel_metric1=Modshogun::ModelSelectionParameters.new('distance', metric1)
param_power_kernel.append_child(param_power_kernel_metric1)
param_power_kernel_metric1_k=Modshogun::ModelSelectionParameters.new('k')
param_power_kernel_metric1_k.build_values(1, 12, Modshogun::R_LINEAR)
param_power_kernel_metric1.append_child(param_power_kernel_metric1_k)
gaussian_kernel=Modshogun::GaussianKernel.new
param_gaussian_kernel=Modshogun::ModelSelectionParameters.new('kernel', gaussian_kernel)
root.append_child(param_gaussian_kernel)
param_gaussian_kernel_width=Modshogun::ModelSelectionParameters.new('log_width')
param_gaussian_kernel_width.build_values(0.0, 0.5*Math.log(2.0), Modshogun::R_LINEAR)
param_gaussian_kernel.append_child(param_gaussian_kernel_width)
ds_kernel=Modshogun::DistantSegmentsKernel.new
param_ds_kernel=Modshogun::ModelSelectionParameters.new('kernel', ds_kernel)
root.append_child(param_ds_kernel)
param_ds_kernel_delta=Modshogun::ModelSelectionParameters.new('delta')
param_ds_kernel_delta.build_values(1, 2, Modshogun::R_EXP)
param_ds_kernel.append_child(param_ds_kernel_delta)
param_ds_kernel_theta=Modshogun::ModelSelectionParameters.new('theta')
param_ds_kernel_theta.build_values(1, 2, Modshogun::R_EXP)
param_ds_kernel.append_child(param_ds_kernel_theta)
#root.print_tree()
combinations=root.get_combinations()
#combinations.get_num_elements.times do |i|
# combinations.get_element(i).print_tree()
#end
return
end
if __FILE__ == $0
puts 'ParameterTree'
pp modelselection_parameter_tree_modular()
end