|
SHOGUN
4.2.0
|
This page lists ready to run shogun examples for the C++ libshogun interface.
To run the examples you will need to manually compile them via
g++ name_of_example.cpp -lshogun
in case you installed libshogun to a nonstandard directory you will need to specify the appropriate library and include paths, e.g.
g++ -I/path/to/libshogun/includes name_of_example.cpp -L/path/to/libshogun/sofile -lshogun
Then the examples are standard binary executables and can be started via
./name_of_example
respectively if the libraries are in nonstandard locations (such that they cannot be found by the dynamic linker)
LD_LIBRARY_PATH=path/to/libshogun ./name_of_example
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Shashwat Lal Das
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*
* This example demonstrates use of the Vowpal Wabbit learning algorithm.
*/
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/streaming/StreamingAsciiFile.h>
#include <shogun/features/streaming/StreamingDenseFeatures.h>
#include <shogun/multiclass/tree/BalancedConditionalProbabilityTree.h>
using namespace shogun;
int main(int argc, char **argv)
{
init_shogun_with_defaults();
const char* train_file_name = "../data/7class_example4_train.dense";
const char* test_file_name = "../data/7class_example4_test.dense";
CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name);
SG_REF(train_file);
CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024);
SG_REF(train_features);
CBalancedConditionalProbabilityTree *cpt = new CBalancedConditionalProbabilityTree();
cpt->set_num_passes(1);
cpt->set_features(train_features);
if (argc > 1)
{
float64_t alpha = 0.5;
sscanf(argv[1], "%lf", &alpha);
SG_SPRINT("Setting alpha to %.2lf\n", alpha);
cpt->set_alpha(alpha);
}
cpt->train();
cpt->print_tree();
CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name);
SG_REF(test_file);
CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
SG_REF(test_features);
CMulticlassLabels *pred = cpt->apply_multiclass(test_features);
test_features->reset_stream();
SG_SPRINT("num_labels = %d\n", pred->get_num_labels());
SG_UNREF(test_features);
SG_UNREF(test_file);
test_file = new CStreamingAsciiFile(test_file_name);
SG_REF(test_file);
test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
SG_REF(test_features);
CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels());
SG_REF(gnd);
test_features->start_parser();
for (int32_t i=0; i < pred->get_num_labels(); ++i)
{
test_features->get_next_example();
gnd->set_int_label(i, test_features->get_label());
test_features->release_example();
}
test_features->end_parser();
int32_t n_correct = 0;
for (index_t i=0; i < pred->get_num_labels(); ++i)
{
if (pred->get_int_label(i) == gnd->get_int_label(i))
n_correct++;
//SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i));
}
SG_SPRINT("\n");
SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels());
SG_UNREF(gnd);
SG_UNREF(train_features);
SG_UNREF(test_features);
SG_UNREF(train_file);
SG_UNREF(test_file);
SG_UNREF(cpt);
SG_UNREF(pred);
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Viktor Gal
*/
#include <shogun/base/init.h>
#include <shogun/machine/BaggingMachine.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/ensemble/MajorityVote.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/streaming/generators/MeanShiftDataGenerator.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
float64_t difference = 2.5;
index_t dim = 2;
index_t num_neg = 20;
index_t num_pos = 20;
int32_t num_bags = 5;
int32_t bag_size = 25;
/* streaming data generator for mean shift distributions */
CMeanShiftDataGenerator* gen_n = new CMeanShiftDataGenerator(0, dim);
CMeanShiftDataGenerator* gen_p = new CMeanShiftDataGenerator(difference, dim);
CFeatures* neg = gen_n->get_streamed_features(num_pos);
CFeatures* pos = gen_p->get_streamed_features(num_neg);
CDenseFeatures<float64_t>* train_feats =
CDenseFeatures<float64_t>::obtain_from_generic(neg->create_merged_copy(pos));
SGVector<float64_t> tl(num_neg+num_pos);
tl.set_const(1);
for (index_t i = 0; i < num_neg; ++i)
tl[i] = -1;
CBinaryLabels* train_labels = new CBinaryLabels(tl);
CBaggingMachine* bm = new CBaggingMachine(train_feats, train_labels);
CLibLinear* ll = new CLibLinear();
ll->set_bias_enabled(true);
CMajorityVote* mv = new CMajorityVote();
bm->set_num_bags(num_bags);
bm->set_bag_size(bag_size);
bm->set_machine(ll);
bm->set_combination_rule(mv);
bm->train();
CBinaryLabels* pred_bagging = bm->apply_binary(train_feats);
CContingencyTableEvaluation* eval = new CContingencyTableEvaluation();
pred_bagging->get_int_labels().display_vector();
float64_t bag_accuracy = eval->evaluate(pred_bagging, train_labels);
float64_t oob_error = bm->get_oob_error(eval);
CLibLinear* libLin = new CLibLinear(2.0, train_feats, train_labels);
libLin->set_bias_enabled(true);
libLin->train();
CBinaryLabels* pred_liblin = libLin->apply_binary(train_feats);
pred_liblin->get_int_labels().display_vector();
float64_t liblin_accuracy = eval->evaluate(pred_liblin, train_labels);
SG_SPRINT("bagging accuracy: %f (OOB-error: %f)\nLibLinear accuracy: %f\n",
bag_accuracy, oob_error, liblin_accuracy);
SG_UNREF(bm);
SG_UNREF(pos);
SG_UNREF(neg);
SG_UNREF(eval);
exit_shogun();
return 0;
}
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/FeatureBlockLogisticRegression.h>
#include <shogun/lib/IndexBlock.h>
#include <shogun/lib/IndexBlockTree.h>
#include <shogun/lib/IndexBlockGroup.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
// create some data
SGMatrix<float64_t> matrix(4,4);
for (int32_t i=0; i<4*4; i++)
matrix.matrix[i]=i;
CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix);
// create three labels
CBinaryLabels* labels=new CBinaryLabels(4);
labels->set_label(0, -1);
labels->set_label(1, +1);
labels->set_label(2, -1);
labels->set_label(3, +1);
CIndexBlock* first_block = new CIndexBlock(0,2);
CIndexBlock* second_block = new CIndexBlock(2,4);
CIndexBlockGroup* block_group = new CIndexBlockGroup();
block_group->add_block(first_block);
block_group->add_block(second_block);
CFeatureBlockLogisticRegression* regressor = new CFeatureBlockLogisticRegression(0.5,features,labels,block_group);
regressor->train();
regressor->get_w().display_vector();
CIndexBlock* root_block = new CIndexBlock(0,4);
root_block->add_sub_block(first_block);
root_block->add_sub_block(second_block);
CIndexBlockTree* block_tree = new CIndexBlockTree(root_block);
regressor->set_feature_relation(block_tree);
regressor->train();
regressor->get_w().display_vector();
SG_UNREF(regressor);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Roman Votyakov
*/
#ifdef USE_GPL_SHOGUN
#include <shogun/lib/config.h>
#include <shogun/base/init.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/machine/gp/SingleLaplacianInferenceMethod.h>
#include <shogun/machine/gp/EPInferenceMethod.h>
#include <shogun/machine/gp/ZeroMean.h>
#include <shogun/machine/gp/LogitLikelihood.h>
#include <shogun/machine/gp/ProbitLikelihood.h>
#include <shogun/classifier/GaussianProcessClassification.h>
#include <shogun/io/CSVFile.h>
using namespace shogun;
// files with training data
const char* fname_feat_train="../data/fm_train_real.dat";
const char* fname_label_train="../data/label_train_twoclass.dat";
// file with testing data
const char* fname_feat_test="../data/fm_test_real.dat";
int main(int argc, char** argv)
{
init_shogun_with_defaults();
// trainig data
SGMatrix<float64_t> X_train;
SGVector<float64_t> y_train;
// load training features from file
CCSVFile* file_feat_train=new CCSVFile(fname_feat_train);
X_train.load(file_feat_train);
SG_UNREF(file_feat_train);
// load training labels from file
CCSVFile* file_label_train=new CCSVFile(fname_label_train);
y_train.load(file_label_train);
SG_UNREF(file_label_train);
// testing features
SGMatrix<float64_t> X_test;
// load testing features from file
CCSVFile* file_feat_test=new CCSVFile(fname_feat_test);
X_test.load(file_feat_test);
SG_UNREF(file_feat_test);
// convert training and testing data into shogun representation
CDenseFeatures<float64_t>* feat_train=new CDenseFeatures<float64_t>(X_train);
CBinaryLabels* lab_train=new CBinaryLabels(y_train);
CDenseFeatures<float64_t>* feat_test=new CDenseFeatures<float64_t>(X_test);
SG_REF(feat_test);
// create Gaussian kernel with width = 2.0
CGaussianKernel* kernel=new CGaussianKernel(10, 2.0);
// create zero mean function
CZeroMean* mean=new CZeroMean();
// you can easily switch between probit and logit likelihood models
// by uncommenting/commenting the following lines:
// create probit likelihood model
// CProbitLikelihood* lik=new CProbitLikelihood();
// create logit likelihood model
CLogitLikelihood* lik=new CLogitLikelihood();
// you can easily switch between SingleLaplace and EP approximation by
// uncommenting/commenting the following lines:
// specify SingleLaplace approximation inference method
// CSingleLaplacianInferenceMethod* inf=new CSingleLaplacianInferenceMethod(kernel,
// feat_train, mean, lab_train, lik);
// specify EP approximation inference method
CEPInferenceMethod* inf=new CEPInferenceMethod(kernel, feat_train, mean,
lab_train, lik);
// create and train GP classifier, which uses SingleLaplace approximation
CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf);
gpc->train();
// apply binary classification to the test data and get -1/+1
// labels of the predictions
CBinaryLabels* predictions=gpc->apply_binary(feat_test);
predictions->get_labels().display_vector("predictions");
// get probabilities p(y*=1|x*) for each testing feature x*
SGVector<float64_t> p_test=gpc->get_probabilities(feat_test);
p_test.display_vector("predictive probability");
// get predictive mean
SGVector<float64_t> mu_test=gpc->get_mean_vector(feat_test);
mu_test.display_vector("predictive mean");
// get predictive variance
SGVector<float64_t> s2_test=gpc->get_variance_vector(feat_test);
s2_test.display_vector("predictive variance");
// free up memory
SG_UNREF(gpc);
SG_UNREF(predictions);
SG_UNREF(feat_test);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann and others
*/
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/multiclass/LaRank.h>
#include <shogun/base/init.h>
using namespace shogun;
void test()
{
index_t num_vec=10;
index_t num_feat=3;
index_t num_class=num_feat; // to make data easy
float64_t distance=15;
// create some linearly seperable data
SGMatrix<float64_t> matrix(num_class, num_vec);
SGMatrix<float64_t> matrix_test(num_class, num_vec);
CMulticlassLabels* labels=new CMulticlassLabels(num_vec);
CMulticlassLabels* labels_test=new CMulticlassLabels(num_vec);
for (index_t i=0; i<num_vec; ++i)
{
index_t label=i%num_class;
for (index_t j=0; j<num_feat; ++j)
{
matrix(j,i)=CMath::randn_double();
matrix_test(j,i)=CMath::randn_double();
labels->set_label(i, label);
labels_test->set_label(i, label);
}
/* make sure data is linearly seperable per class */
matrix(label,i)+=distance;
matrix_test(label,i)+=distance;
}
matrix.display_matrix("matrix");
labels->get_int_labels().display_vector("labels");
// shogun will now own the matrix created
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix);
CDenseFeatures<float64_t>* features_test=
new CDenseFeatures<float64_t>(matrix_test);
// create three labels
for (index_t i=0; i<num_vec; ++i)
labels->set_label(i, i%num_class);
// create gaussian kernel with cache 10MB, width 0.5
CGaussianKernel* kernel = new CGaussianKernel(10, 0.5);
kernel->init(features, features);
// create libsvm with C=10 and train
CLaRank* svm = new CLaRank(10, kernel, labels);
svm->train();
svm->train();
// classify on training examples
CMulticlassLabels* output=(CMulticlassLabels*)svm->apply();
output->get_labels().display_vector("batch output");
/* assert that batch apply and apply(index_t) give same result */
SGVector<float64_t> single_outputs(output->get_num_labels());
for (index_t i=0; i<output->get_num_labels(); ++i)
single_outputs[i]=svm->apply_one(i);
single_outputs.display_vector("single_outputs");
for (index_t i=0; i<output->get_num_labels(); ++i)
ASSERT(output->get_label(i)==single_outputs[i]);
CMulticlassLabels* output_test=
(CMulticlassLabels*)svm->apply(features_test);
labels_test->get_labels().display_vector("labels_test");
output_test->get_labels().display_vector("output_test");
for (index_t i=0; i<output->get_num_labels(); ++i)
ASSERT(labels_test->get_label(i)==output_test->get_label(i));
// free up memory
SG_UNREF(output);
SG_UNREF(labels_test);
SG_UNREF(output_test);
SG_UNREF(svm);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
#include <shogun/labels/LatentLabels.h>
#include <shogun/features/LatentFeatures.h>
#include <shogun/latent/LatentSVM.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
#include <shogun/mathematics/Math.h>
#include <libgen.h>
using namespace shogun;
#define MAX_LINE_LENGTH 4096
#define HOG_SIZE 1488
struct CBoundingBox : public CData
{
CBoundingBox(int32_t x, int32_t y) : CData(), x_pos(x), y_pos(y) {};
int32_t x_pos, y_pos;
/** @return name of SGSerializable */
virtual const char* get_name() const { return "BoundingBox"; }
};
struct CHOGFeatures : public CData
{
CHOGFeatures(int32_t w, int32_t h) : CData(), width(w), height(h) {};
int32_t width, height;
float64_t ***hog;
/** @return name of SGSerializable */
virtual const char* get_name() const { return "HOGFeatures"; }
};
class CObjectDetector: public CLatentModel
{
public:
CObjectDetector() {}
CObjectDetector(CLatentFeatures* feat, CLatentLabels* labels) : CLatentModel(feat, labels) {}
virtual ~CObjectDetector() {}
virtual int32_t get_dim() const { return HOG_SIZE; }
virtual CDotFeatures* get_psi_feature_vectors()
{
int32_t num_examples = this->get_num_vectors();
int32_t dim = this->get_dim();
SGMatrix<float64_t> psi_m(dim, num_examples);
for (int32_t i = 0; i < num_examples; ++i)
{
CHOGFeatures* hf = (CHOGFeatures*) m_features->get_sample(i);
CBoundingBox* bb = (CBoundingBox*) m_labels->get_latent_label(i);
memcpy(psi_m.matrix+i*dim, hf->hog[bb->x_pos][bb->y_pos], dim*sizeof(float64_t));
}
CDenseFeatures<float64_t>* psi_feats = new CDenseFeatures<float64_t>(psi_m);
return psi_feats;
}
virtual CData* infer_latent_variable(const SGVector<float64_t>& w, index_t idx)
{
int32_t pos_x = 0, pos_y = 0;
float64_t max_score = -CMath::INFTY;
CHOGFeatures* hf = (CHOGFeatures*) m_features->get_sample(idx);
for (int i = 0; i < hf->width; ++i)
{
for (int j = 0; j < hf->height; ++j)
{
float64_t score = CMath::dot(w.vector, hf->hog[i][j], w.vlen);
if (score > max_score)
{
pos_x = i;
pos_y = j;
max_score = score;
}
}
}
SG_SDEBUG("%d %d %f\n", pos_x, pos_y, max_score);
CBoundingBox* h = new CBoundingBox(pos_x, pos_y);
SG_REF(h);
return h;
}
};
static void read_dataset(char* fname, CLatentFeatures*& feats, CLatentLabels*& labels)
{
FILE* fd = fopen(fname, "r");
char line[MAX_LINE_LENGTH];
char *pchar, *last_pchar;
int num_examples,label,height,width;
char* path = dirname(fname);
if (fd == NULL)
SG_SERROR("Cannot open input file %s!\n", fname);
fgets(line, MAX_LINE_LENGTH, fd);
num_examples = atoi(line);
labels = new CLatentLabels(num_examples);
SG_REF(labels);
CBinaryLabels* ys = new CBinaryLabels(num_examples);
feats = new CLatentFeatures(num_examples);
SG_REF(feats);
CMath::init_random();
for (int i = 0; (!feof(fd)) && (i < num_examples); ++i)
{
fgets(line, MAX_LINE_LENGTH, fd);
pchar = line;
while ((*pchar)!=' ') pchar++;
*pchar = '\0';
pchar++;
/* label: {-1, 1} */
last_pchar = pchar;
while ((*pchar)!=' ') pchar++;
*pchar = '\0';
label = (atoi(last_pchar) % 2 == 0) ? 1 : -1;
pchar++;
if (ys->set_label(i, label) == false)
SG_SERROR("Couldn't set label for element %d\n", i);
last_pchar = pchar;
while ((*pchar)!=' ') pchar++;
*pchar = '\0';
width = atoi(last_pchar);
pchar++;
last_pchar = pchar;
while ((*pchar)!='\n') pchar++;
*pchar = '\0';
height = atoi(last_pchar);
/* create latent label */
int x = CMath::random(0, width-1);
int y = CMath::random(0, height-1);
CBoundingBox* bb = new CBoundingBox(x,y);
labels->add_latent_label(bb);
SG_SPROGRESS(i, 0, num_examples);
CHOGFeatures* hog = new CHOGFeatures(width, height);
hog->hog = SG_CALLOC(float64_t**, hog->width);
for (int j = 0; j < width; ++j)
{
hog->hog[j] = SG_CALLOC(float64_t*, hog->height);
for (int k = 0; k < height; ++k)
{
char filename[MAX_LINE_LENGTH];
hog->hog[j][k] = SG_CALLOC(float64_t, HOG_SIZE);
sprintf(filename,"%s/%s.%03d.%03d.txt",path,line,j,k);
FILE* f = fopen(filename, "r");
if (f == NULL)
SG_SERROR("Could not open file: %s\n", filename);
for (int l = 0; l < HOG_SIZE; ++l)
fscanf(f,"%lf",&hog->hog[j][k][l]);
fclose(f);
}
}
feats->add_sample(hog);
}
fclose(fd);
labels->set_labels(ys);
SG_SDONE();
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
/* check whether the train/test args are given */
if (argc < 3)
{
SG_SERROR("not enough arguements given\n");
}
CLatentFeatures* train_feats = NULL;
CLatentLabels* train_labels = NULL;
/* read train data set */
read_dataset(argv[1], train_feats, train_labels);
/* train the classifier */
float64_t C = 10.0;
CObjectDetector* od = new CObjectDetector(train_feats, train_labels);
CLatentSVM llm(od, C);
llm.train();
// CLatentFeatures* test_feats = NULL;
// CLatentLabels* test_labels = NULL;
// read_dataset(argv[2], test_feats, test_labels);
SG_SPRINT("Testing with the test set\n");
llm.apply(train_feats);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Kevin Hughes
* Copyright (C) 2013 Kevin Hughes
*
* Thanks to Fernando Jose Iglesias Garcia (shogun)
* and Matthieu Perrot (scikit-learn)
*/
#include <shogun/base/init.h>
#include <shogun/lib/config.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/multiclass/MCLDA.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/io/SGIO.h>
#include <shogun/lib/common.h>
#include <shogun/features/DataGenerator.h>
using namespace shogun;
#define NUM 50
#define DIMS 2
#define CLASSES 2
void test()
{
#ifdef HAVE_LAPACK
SGVector< float64_t > lab(CLASSES*NUM);
SGMatrix< float64_t > feat(DIMS, CLASSES*NUM);
feat = CDataGenerator::generate_gaussians(NUM,CLASSES,DIMS);
for( int i = 0 ; i < CLASSES ; ++i )
for( int j = 0 ; j < NUM ; ++j )
lab[i*NUM+j] = double(i);
// Create train labels
CMulticlassLabels* labels = new CMulticlassLabels(lab);
// Create train features
CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feat);
// Create QDA classifier
CMCLDA* lda = new CMCLDA(features, labels);
SG_REF(lda);
lda->train();
// Classify and display output
CMulticlassLabels* output=CLabelsFactory::to_multiclass(lda->apply());
SG_REF(output);
SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels());
// Free memory
SG_UNREF(output);
SG_UNREF(lda);
#endif
}
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/LinearKernel.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <iostream>
using namespace shogun;
//generates data points (of different classes) randomly
void gen_rand_data(SGMatrix<float64_t> features, SGVector<float64_t> labels, float64_t distance)
{
index_t num_samples=labels.vlen;
index_t dimensions=features.num_rows;
for (int32_t i=0; i<num_samples; i++)
{
if (i<num_samples/2)
{
labels[i]=-1.0;
for(int32_t j=0; j<dimensions; j++)
features(j,i)=CMath::random(0.0,1.0)+distance;
}
else
{
labels[i]=1.0;
for(int32_t j=0; j<dimensions; j++)
features(j,i)=CMath::random(0.0,1.0)-distance;
}
}
labels.display_vector("labels");
std::cout<<std::endl;
features.display_matrix("features");
std::cout<<std::endl;
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
const float64_t svm_C=10;
index_t num_samples=20;
index_t dimensions=2;
float64_t dist=0.5;
SGMatrix<float64_t> featureMatrix(dimensions,num_samples);
SGVector<float64_t> labelVector(num_samples);
//random generation of data
gen_rand_data(featureMatrix,labelVector,dist);
//create train labels
CLabels* labels=new CBinaryLabels(labelVector);
//create train features
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
SG_REF(features);
features->set_feature_matrix(featureMatrix);
//create linear kernel
CLinearKernel* kernel=new CLinearKernel();
SG_REF(kernel);
kernel->init(features, features);
//create svm classifier by LibSVM
CLibSVM* svm=new CLibSVM(svm_C,kernel, labels);
SG_REF(svm);
svm->train();
//classify data points
CBinaryLabels* out_labels=CLabelsFactory::to_binary(svm->apply());
/*convert scores to calibrated probabilities by fitting a sigmoid function
using the method described in Lin, H., Lin, C., and Weng, R. (2007). A note
on Platt's probabilistic outputs for support vector machines.
See BinaryLabels documentation for details*/
out_labels->scores_to_probabilities();
//display output labels and probabilities
for (int32_t i=0; i<num_samples; i++)
{
SG_SPRINT("out[%d]=%f (%f)\n", i, out_labels->get_label(i),
out_labels->get_value(i));
}
//clean up
SG_UNREF(out_labels);
SG_UNREF(kernel);
SG_UNREF(features);
SG_UNREF(svm);
exit_shogun();
return 0;
}
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char** argv)
{
init_shogun(&print_message);
// create some data
SGMatrix<float64_t> matrix(2,3);
for (int32_t i=0; i<6; i++)
matrix.matrix[i]=i;
// create three 2-dimensional vectors
// shogun will now own the matrix created
CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>();
features->set_feature_matrix(matrix);
// create three labels
CBinaryLabels* labels=new CBinaryLabels(3);
labels->set_label(0, -1);
labels->set_label(1, +1);
labels->set_label(2, -1);
// create gaussian kernel with cache 10MB, width 0.5
CGaussianKernel* kernel = new CGaussianKernel(10, 0.5);
kernel->init(features, features);
// create libsvm with C=10 and train
CLibSVM* svm = new CLibSVM(10, kernel, labels);
svm->train();
// classify on training examples
for (int32_t i=0; i<3; i++)
SG_SPRINT("output[%d]=%f\n", i, svm->apply_one(i));
// free up memory
SG_UNREF(svm);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2013 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/labels/DenseLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/mkl/MKLClassification.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/classifier/svm/SVMLight.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CombinedKernel.h>
#include <shogun/distance/MinkowskiMetric.h>
using namespace shogun;
CModelSelectionParameters* create_param_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1=new CModelSelectionParameters("C1");
root->append_child(c1);
c1->build_values(-1.0, 1.0, R_EXP);
CModelSelectionParameters* c2=new CModelSelectionParameters("C2");
root->append_child(c2);
c2->build_values(-1.0, 1.0, R_EXP);
CCombinedKernel* kernel1=new CCombinedKernel();
kernel1->append_kernel(new CGaussianKernel(10, 2));
kernel1->append_kernel(new CGaussianKernel(10, 3));
kernel1->append_kernel(new CGaussianKernel(10, 4));
CModelSelectionParameters* param_kernel1=new CModelSelectionParameters(
"kernel", kernel1);
root->append_child(param_kernel1);
CCombinedKernel* kernel2=new CCombinedKernel();
kernel2->append_kernel(new CGaussianKernel(10, 20));
kernel2->append_kernel(new CGaussianKernel(10, 30));
kernel2->append_kernel(new CGaussianKernel(10, 40));
CModelSelectionParameters* param_kernel2=new CModelSelectionParameters(
"kernel", kernel2);
root->append_child(param_kernel2);
return root;
}
/** Demonstrates the MKL modelselection bug with SVMLight. See comments how to reproduce */
void test()
{
int32_t num_subsets=3;
int32_t num_vectors=20;
int32_t dim_vectors=3;
/* create some data and labels */
SGMatrix<float64_t> matrix(dim_vectors, num_vectors);
CBinaryLabels* labels=new CBinaryLabels(num_vectors);
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
matrix.matrix[i]=CMath::randn_double();
/* create num_feautres 2-dimensional vectors */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
features->set_feature_matrix(matrix);
/* create combined features */
CCombinedFeatures* comb_features=new CCombinedFeatures();
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
/* create labels, two classes */
for (index_t i=0; i<num_vectors; ++i)
labels->set_label(i, i%2==0 ? 1 : -1);
/* works */
CMKLClassification* classifier=new CMKLClassification(new CLibSVM());
classifier->set_interleaved_optimization_enabled(false);
/* the above plus this does not work (interleaved only with SVMLight)*/
// classifier->set_interleaved_optimization_enabled(true);
/* However, SVMLight does not work */
// CMKLClassification* classifier=new CMKLClassification(new CSVMLight());
// /* any of those */
// classifier->set_interleaved_optimization_enabled(false);
// classifier->set_interleaved_optimization_enabled(true);
/* splitting strategy */
CStratifiedCrossValidationSplitting* splitting_strategy=
new CStratifiedCrossValidationSplitting(labels, num_subsets);
/* accuracy evaluation */
CContingencyTableEvaluation* evaluation_criterium=
new CContingencyTableEvaluation(ACCURACY);
/* cross validation class for evaluation in model selection */
CCrossValidation* cross=new CCrossValidation(classifier, comb_features,
labels, splitting_strategy, evaluation_criterium);
cross->set_num_runs(1);
/* TODO: remove this once locking is fixed for combined kernels */
cross->set_autolock(false);
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
classifier->print_modsel_params();
/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
CModelSelectionParameters* param_tree=create_param_tree();
param_tree->print_tree();
/* handles all of the above structures in memory */
CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
cross, param_tree);
bool print_state=true;
CParameterCombination* best_combination=grid_search->select_model(
print_state);
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();
best_combination->apply_to_machine(classifier);
/* larger number of runs to have tighter confidence intervals */
cross->set_num_runs(10);
// cross->set_conf_int_alpha(0.01);
CEvaluationResult* result=cross->evaluate();
SG_SPRINT("result: ");
result->print_result();
/* clean up destroy result parameter */
SG_UNREF(best_combination);
SG_UNREF(grid_search);
SG_UNREF(result);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2009 Alexander Binder
* Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <iostream>
#include <shogun/io/SGIO.h>
#include <shogun/lib/ShogunException.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/kernel/CustomKernel.h>
#include <shogun/kernel/CombinedKernel.h>
#include <shogun/classifier/mkl/MKLMulticlass.h>
// g++ -Wall -O3 classifier_mklmulticlass.cpp -I /home/theseus/private/alx/shoguntrunk/compiledtmp/include -L/home/theseus/private/alx/shoguntrunk/compiledtmp/lib -lshogun
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void print_warning(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void print_error(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void getgauss(float64_t & y1, float64_t & y2)
{
float x1, x2, w;
do {
x1 = 2.0 * rand()/(float64_t)RAND_MAX - 1.0;
x2 = 2.0 * rand()/(float64_t)RAND_MAX - 1.0;
w = x1 * x1 + x2 * x2;
} while ( (w >= 1.0)|| (w<1e-9) );
w = sqrt( (-2.0 * log( w ) ) / w );
y1 = x1 * w;
y2 = x2 * w;
}
void gendata(std::vector<float64_t> & x,std::vector<float64_t> & y,
CMulticlassLabels*& lab)
{
int32_t totalsize=240;
int32_t class1size=80;
int32_t class2size=70;
//generating three class data set
x.resize(totalsize);
y.resize(totalsize);
for(size_t i=0; i< x.size();++i)
getgauss(x[i], y[i]);
for(size_t i=0; i< x.size();++i)
{
if((int32_t)i < class1size)
{
x[i]+=0;
y[i]+=0;
}
else if( (int32_t)i< class1size+class2size)
{
x[i]+=+1;
y[i]+=-1;
}
else
{
x[i]+=-1;
y[i]+=+1;
}
}
//set labels
lab=new CMulticlassLabels(x.size());
for(size_t i=0; i< x.size();++i)
{
if((int32_t)i < class1size)
lab->set_int_label(i,0);
else if( (int32_t)i< class1size+class2size)
lab->set_int_label(i,1);
else
lab->set_int_label(i,2);
}
}
void gentrainkernel(float64_t * & ker1 ,float64_t * & ker2, float64_t * & ker3 ,float64_t &
autosigma,float64_t & n1,float64_t & n2, float64_t & n3,
const std::vector<float64_t> & x,
const std::vector<float64_t> & y)
{
autosigma=0;
for(size_t l=0; l< x.size();++l)
{
for(size_t r=0; r<= l;++r)
{
float64_t dist=((x[l]-x[r])*(x[l]-x[r]) + (y[l]-y[r])*(y[l]-y[r]));
autosigma+=dist*2.0/(float64_t)x.size()/((float64_t)x.size()+1);
}
}
float64_t fm1=0, mean1=0,fm2=0, mean2=0,fm3=0, mean3=0;
ker1=SG_MALLOC(float64_t, x.size()*x.size());
ker2=SG_MALLOC(float64_t, x.size()*x.size());
ker3=SG_MALLOC(float64_t, x.size()*x.size());
for(size_t l=0; l< x.size();++l)
{
for(size_t r=0; r< x.size();++r)
{
float64_t dist=((x[l]-x[r])*(x[l]-x[r]) + (y[l]-y[r])*(y[l]-y[r]));
ker1[l +r*x.size()]= exp( -dist/autosigma/autosigma) ;
//ker2[l +r*x.size()]= exp( -dist/sigma2/sigma2) ;
ker2[l +r*x.size()]= x[l]*x[r] + y[l]*y[r];
ker3[l +r*x.size()]= (x[l]*x[r] + y[l]*y[r]+1)*(x[l]*x[r] + y[l]*y[r]+1);
fm1+=ker1[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size());
fm2+=ker2[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size());
fm3+=ker3[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size());
if(l==r)
{
mean1+=ker1[l +r*x.size()]/(float64_t)x.size();
mean2+=ker2[l +r*x.size()]/(float64_t)x.size();
mean3+=ker3[l +r*x.size()]/(float64_t)x.size();
}
}
}
n1=(mean1-fm1);
n2=(mean2-fm2);
n3=(mean3-fm3);
for(size_t l=0; l< x.size();++l)
{
for(size_t r=0; r< x.size();++r)
{
ker1[l +r*x.size()]=ker1[l +r*x.size()]/n1;
ker2[l +r*x.size()]=ker2[l +r*x.size()]/n2;
ker3[l +r*x.size()]=ker3[l +r*x.size()]/n3;
}
}
}
void gentestkernel(float64_t * & ker1 ,float64_t * & ker2,float64_t * & ker3,
const float64_t autosigma,const float64_t n1,const float64_t n2, const float64_t n3,
const std::vector<float64_t> & x,const std::vector<float64_t> & y,
const std::vector<float64_t> & tx,const std::vector<float64_t> & ty)
{
ker1=SG_MALLOC(float64_t, x.size()*tx.size());
ker2=SG_MALLOC(float64_t, x.size()*tx.size());
ker3=SG_MALLOC(float64_t, x.size()*tx.size());
for(size_t l=0; l< x.size();++l)
{
for(size_t r=0; r< tx.size();++r)
{
float64_t dist=((x[l]-tx[r])*(x[l]-tx[r]) + (y[l]-ty[r])*(y[l]-ty[r]));
ker1[l +r*x.size()]= exp( -dist/autosigma/autosigma) ;
ker2[l +r*x.size()]= x[l]*tx[r] + y[l]*ty[r];
ker3[l +r*x.size()]= (x[l]*tx[r] + y[l]*ty[r]+1)*(x[l]*tx[r] + y[l]*ty[r]+1);
}
}
for(size_t l=0; l< x.size();++l)
{
for(size_t r=0; r< tx.size();++r)
{
ker1[l +r*x.size()]=ker1[l +r*x.size()]/n1;
ker2[l +r*x.size()]=ker2[l +r*x.size()]/n2;
ker3[l +r*x.size()]=ker3[l +r*x.size()]/n2;
}
}
}
void tester()
{
CMulticlassLabels* lab=NULL;
std::vector<float64_t> x,y;
gendata(x,y, lab);
SG_REF(lab);
float64_t* ker1=NULL;
float64_t* ker2=NULL;
float64_t* ker3=NULL;
float64_t autosigma=1;
float64_t n1=0;
float64_t n2=0;
float64_t n3=0;
int32_t numdata=0;
gentrainkernel( ker1 , ker2, ker3 , autosigma, n1, n2, n3,x,y);
numdata=x.size();
CCombinedKernel* ker=new CCombinedKernel();
CCustomKernel* kernel1=new CCustomKernel();
CCustomKernel* kernel2=new CCustomKernel();
CCustomKernel* kernel3=new CCustomKernel();
kernel1->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker1, numdata,numdata,false));
kernel2->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker2, numdata,numdata,false));
kernel3->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker3, numdata,numdata,false));
SG_FREE(ker1);
SG_FREE(ker2);
SG_FREE(ker3);
ker->append_kernel(kernel1);
ker->append_kernel(kernel2);
ker->append_kernel(kernel3);
//here comes the core stuff
float64_t regconst=1.0;
CMKLMulticlass* tsvm =new CMKLMulticlass(regconst, ker, lab);
tsvm->set_epsilon(0.0001); // SVM epsilon
// MKL parameters
tsvm->set_mkl_epsilon(0.01); // subkernel weight L2 norm termination criterion
tsvm->set_max_num_mkliters(120); // well it will be just three iterations
tsvm->set_mkl_norm(1.5); // mkl norm
//starting svm training
tsvm->train();
SG_SPRINT("finished svm training\n");
//starting svm testing on training data
CMulticlassLabels* res=CLabelsFactory::to_multiclass(tsvm->apply());
ASSERT(res);
float64_t err=0;
for(int32_t i=0; i<numdata;++i)
{
ASSERT(i< res->get_num_labels());
if (lab->get_int_label(i)!=res->get_int_label(i))
err+=1;
}
err/=(float64_t)res->get_num_labels();
SG_SPRINT("prediction error on training data (3 classes): %f ",err);
SG_SPRINT("random guess error would be: %f \n",2/3.0);
//generate test data
CMulticlassLabels* tlab=NULL;
std::vector<float64_t> tx,ty;
gendata( tx,ty,tlab);
SG_REF(tlab);
float64_t* tker1=NULL;
float64_t* tker2=NULL;
float64_t* tker3=NULL;
gentestkernel(tker1,tker2,tker3, autosigma, n1,n2,n3, x,y, tx,ty);
int32_t numdatatest=tx.size();
CCombinedKernel* tker=new CCombinedKernel();
SG_REF(tker);
CCustomKernel* tkernel1=new CCustomKernel();
CCustomKernel* tkernel2=new CCustomKernel();
CCustomKernel* tkernel3=new CCustomKernel();
tkernel1->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker1,numdata, numdatatest, false));
tkernel2->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker2,numdata, numdatatest, false));
tkernel3->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker2,numdata, numdatatest, false));
SG_FREE(tker1);
SG_FREE(tker2);
SG_FREE(tker3);
tker->append_kernel(tkernel1);
tker->append_kernel(tkernel2);
tker->append_kernel(tkernel3);
int32_t numweights;
float64_t* weights=tsvm->getsubkernelweights(numweights);
SG_SPRINT("test kernel weights\n");
for(int32_t i=0; i< numweights;++i)
SG_SPRINT("%f ", weights[i]);
SG_SPRINT("\n");
//set kernel
tker->set_subkernel_weights(SGVector<float64_t>(weights, numweights));
tsvm->set_kernel(tker);
//compute classification error, check mem
CMulticlassLabels* tres=CLabelsFactory::to_multiclass(tsvm->apply());
float64_t terr=0;
for(int32_t i=0; i<numdatatest;++i)
{
ASSERT(i< tres->get_num_labels());
if(tlab->get_int_label(i)!=tres->get_int_label(i))
terr+=1;
}
terr/=(float64_t) tres->get_num_labels();
SG_SPRINT("prediction error on test data (3 classes): %f ",terr);
SG_SPRINT("random guess error would be: %f \n",2/3.0);
SG_UNREF(tsvm);
SG_UNREF(res);
SG_UNREF(tres);
SG_UNREF(lab);
SG_UNREF(tlab);
SG_UNREF(tker);
SG_SPRINT( "finished \n");
}
namespace shogun
{
extern Version* sg_version;
extern SGIO* sg_io;
}
int main()
{
init_shogun(&print_message, &print_warning,
&print_error);
try
{
sg_version->print_version();
sg_io->set_loglevel(MSG_INFO);
tester();
}
catch(ShogunException & sh)
{
printf("%s",sh.get_exception_string());
}
exit_shogun();
return 0;
}
#include <shogun/io/CSVFile.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/io/SGIO.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/multiclass/ecoc/ECOCStrategy.h>
#include <shogun/multiclass/ecoc/ECOCOVREncoder.h>
#include <shogun/multiclass/ecoc/ECOCHDDecoder.h>
#include <shogun/machine/LinearMulticlassMachine.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/base/init.h>
#define EPSILON 1e-5
using namespace shogun;
// Training data
const char fname_feats[]="../data/fm_train_real.dat";
const char fname_labels[]="../data/label_train_multiclass.dat";
void test()
{
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
// Create liblinear svm classifier with L2-regularized L2-loss
CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC);
SG_REF(svm);
// Add some configuration to the svm
svm->set_epsilon(EPSILON);
svm->set_bias_enabled(true);
// Create a multiclass svm classifier that consists of several of the previous one
CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine(
new CECOCStrategy(new CECOCOVREncoder(), new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels);
SG_REF(mc_svm);
// Train the multiclass machine using the data passed in the constructor
mc_svm->train();
// Classify the training examples and show the results
CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply());
SGVector< int32_t > out_labels = output->get_int_labels();
SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen);
// Free resources
SG_UNREF(mc_svm);
SG_UNREF(svm);
SG_UNREF(output);
SG_UNREF(features);
SG_UNREF(labels);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
#include <shogun/io/CSVFile.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/io/SGIO.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/multiclass/ecoc/ECOCStrategy.h>
#include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h>
#include <shogun/multiclass/ecoc/ECOCHDDecoder.h>
#include <shogun/machine/LinearMulticlassMachine.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/base/init.h>
#define EPSILON 1e-5
using namespace shogun;
/* file data */
const char fname_feats[]="../data/fm_train_real.dat";
const char fname_labels[]="../data/label_train_multiclass.dat";
void test()
{
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
// Create liblinear svm classifier with L2-regularized L2-loss
CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC);
SG_REF(svm);
// Add some configuration to the svm
svm->set_epsilon(EPSILON);
svm->set_bias_enabled(true);
CECOCDiscriminantEncoder *encoder = new CECOCDiscriminantEncoder();
encoder->set_features(features);
encoder->set_labels(labels);
// Create a multiclass svm classifier that consists of several of the previous one
CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine(
new CECOCStrategy(encoder, new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels);
SG_REF(mc_svm);
// Train the multiclass machine using the data passed in the constructor
mc_svm->train();
// Classify the training examples and show the results
CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply());
SGVector< int32_t > out_labels = output->get_int_labels();
SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen);
// Free resources
SG_UNREF(mc_svm);
SG_UNREF(svm);
SG_UNREF(output);
SG_UNREF(features);
SG_UNREF(labels);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
#include <shogun/io/CSVFile.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/io/SGIO.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/multiclass/ecoc/ECOCStrategy.h>
#include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h>
#include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h>
#include <shogun/multiclass/ecoc/ECOCHDDecoder.h>
#include <shogun/machine/LinearMulticlassMachine.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/base/init.h>
#define EPSILON 1e-5
using namespace shogun;
/* file data */
const char fname_feats[]="../data/fm_train_real.dat";
const char fname_labels[]="../data/label_train_multiclass.dat";
void test()
{
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
// Create liblinear svm classifier with L2-regularized L2-loss
CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC);
SG_REF(svm);
// Add some configuration to the svm
svm->set_epsilon(EPSILON);
svm->set_bias_enabled(true);
// Create a multiclass svm classifier that consists of several of the previous one
CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine(
new CECOCStrategy(new CECOCRandomDenseEncoder(), new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels);
SG_REF(mc_svm);
// Train the multiclass machine using the data passed in the constructor
mc_svm->train();
// Classify the training examples and show the results
CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply());
SGVector< int32_t > out_labels = output->get_int_labels();
SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen);
// Free resources
SG_UNREF(mc_svm);
SG_UNREF(svm);
SG_UNREF(output);
SG_UNREF(features);
SG_UNREF(labels);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
test();
exit_shogun();
return 0;
}
#include <shogun/io/CSVFile.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/io/SGIO.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/multiclass/MulticlassStrategy.h>
#include <shogun/multiclass/MulticlassOneVsOneStrategy.h>
#include <shogun/multiclass/MulticlassOneVsRestStrategy.h>
#include <shogun/machine/LinearMulticlassMachine.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/base/init.h>
#define EPSILON 1e-5
using namespace shogun;
/* file data */
const char fname_feats[]="../data/fm_train_real.dat";
const char fname_labels[]="../data/label_train_multiclass.dat";
void test()
{
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
// Create liblinear svm classifier with L2-regularized L2-loss
CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC);
SG_REF(svm);
// Add some configuration to the svm
svm->set_epsilon(EPSILON);
svm->set_bias_enabled(true);
// Create a multiclass svm classifier that consists of several of the previous one
// There are several heuristics are implemented:
// OVA_NORM, OVA_SOFTMAX
// OVO_PRICE, OVO_HASTIE, OVO_HAMAMURA
CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine(
new CMulticlassOneVsOneStrategy(OVO_HASTIE), (CDotFeatures*) features, svm, labels);
SG_REF(mc_svm);
// Train the multiclass machine using the data passed in the constructor
mc_svm->train();
// Classify the training examples and show the results
CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply());
SGVector< int32_t > out_labels = output->get_int_labels();
SGVector<int32_t>::display_vector(out_labels.vector, out_labels.vlen);
for (int32_t i=0; i<output->get_num_labels(); i++)
{
SG_SPRINT("out_values[%d] = ", i);
SGVector<float64_t> out_values = output->get_multiclass_confidences(i);
SGVector<float64_t>::display_vector(out_values.vector, out_values.vlen);
SG_SPRINT("\n");
}
//Free resources
SG_UNREF(mc_svm);
SG_UNREF(svm);
SG_UNREF(output);
SG_UNREF(features);
SG_UNREF(labels);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
//sg_io->set_loglevel(MSG_DEBUG);
test();
exit_shogun();
return 0;
}
#include <shogun/io/CSVFile.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/io/SGIO.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/DenseSubsetFeatures.h>
#include <shogun/base/init.h>
#include <shogun/multiclass/tree/RelaxedTree.h>
#include <shogun/multiclass/MulticlassLibLinear.h>
#include <shogun/evaluation/MulticlassAccuracy.h>
#include <shogun/kernel/GaussianKernel.h>
#define EPSILON 1e-5
using namespace shogun;
const char* fname_feats = "../data/7class_example4_train.dense";
const char* fname_labels = "../data/7class_example4_train.label";
int main(int argc, char** argv)
{
init_shogun_with_defaults();
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
// Create RelaxedTree Machine
CRelaxedTree *machine = new CRelaxedTree();
SG_REF(machine);
machine->set_labels(labels);
CKernel *kernel = new CGaussianKernel();
SG_REF(kernel);
machine->set_kernel(kernel);
CMulticlassLibLinear *svm = new CMulticlassLibLinear();
machine->set_machine_for_confusion_matrix(svm);
machine->train(features);
CMulticlassLabels* output = CLabelsFactory::to_multiclass(machine->apply());
CMulticlassAccuracy *evaluator = new CMulticlassAccuracy();
SG_SPRINT("Accuracy = %.4f\n", evaluator->evaluate(output, labels));
// Free resources
SG_UNREF(machine);
SG_UNREF(output);
SG_UNREF(features);
SG_UNREF(labels);
SG_UNREF(evaluator);
SG_UNREF(kernel);
exit_shogun();
return 0;
}
#include <shogun/io/CSVFile.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/io/SGIO.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/DenseSubsetFeatures.h>
#include <shogun/base/init.h>
#include <shogun/multiclass/ShareBoost.h>
#define EPSILON 1e-5
using namespace shogun;
const char* fname_feats = "../data/7class_example4_train.dense";
const char* fname_labels = "../data/7class_example4_train.label";
int main(int argc, char** argv)
{
init_shogun_with_defaults();
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
SG_SPRINT("Performing ShareBoost on a %d-class problem\n", labels->get_num_classes());
// Create ShareBoost Machine
CShareBoost *machine = new CShareBoost(features, labels, 10);
SG_REF(machine);
machine->train();
SGVector<int32_t> activeset = machine->get_activeset();
SG_SPRINT("%d out of %d features are selected:\n", activeset.vlen, mat.num_rows);
for (int32_t i=0; i < activeset.vlen; ++i)
SG_SPRINT("activeset[%02d] = %d\n", i, activeset[i]);
CDenseSubsetFeatures<float64_t> *subset_fea = new CDenseSubsetFeatures<float64_t>(features, machine->get_activeset());
SG_REF(subset_fea);
CMulticlassLabels* output = CLabelsFactory::to_multiclass(machine->apply(subset_fea));
int32_t correct = 0;
for (int32_t i=0; i < output->get_num_labels(); ++i)
if (output->get_int_label(i) == labels->get_int_label(i))
correct++;
SG_SPRINT("Accuracy = %.4f\n", float64_t(correct)/labels->get_num_labels());
// Free resources
SG_UNREF(machine);
SG_UNREF(output);
SG_UNREF(subset_fea);
SG_UNREF(features);
SG_UNREF(labels);
exit_shogun();
return 0;
}
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/multiclass/MulticlassLibSVM.h>
#include <shogun/base/init.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
index_t num_vec=3;
index_t num_feat=2;
index_t num_class=2;
// create some data
SGMatrix<float64_t> matrix(num_feat, num_vec);
SGVector<float64_t>::range_fill_vector(matrix.matrix, num_feat*num_vec);
// create vectors
// shogun will now own the matrix created
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix);
// create three labels
CMulticlassLabels* labels=new CMulticlassLabels(num_vec);
for (index_t i=0; i<num_vec; ++i)
labels->set_label(i, i%num_class);
// create gaussian kernel with cache 10MB, width 0.5
CGaussianKernel* kernel = new CGaussianKernel(10, 0.5);
kernel->init(features, features);
// create libsvm with C=10 and train
CMulticlassLibSVM* svm = new CMulticlassLibSVM(10, kernel, labels);
svm->train();
// classify on training examples
CMulticlassLabels* output=CLabelsFactory::to_multiclass(svm->apply());
SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(),
"batch output");
/* assert that batch apply and apply(index_t) give same result */
for (index_t i=0; i<output->get_num_labels(); ++i)
{
float64_t label=svm->apply_one(i);
SG_SPRINT("single output[%d]=%f\n", i, label);
ASSERT(output->get_label(i)==label);
}
SG_UNREF(output);
// free up memory
SG_UNREF(svm);
exit_shogun();
return 0;
}
#include <shogun/io/CSVFile.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/io/SGIO.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/multiclass/MulticlassOneVsOneStrategy.h>
#include <shogun/machine/LinearMulticlassMachine.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/base/init.h>
#define EPSILON 1e-5
using namespace shogun;
/* file data */
const char fname_feats[]="../data/fm_train_real.dat";
const char fname_labels[]="../data/label_train_multiclass.dat";
void test()
{
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
// Create liblinear svm classifier with L2-regularized L2-loss
CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC);
SG_REF(svm);
// Add some configuration to the svm
svm->set_epsilon(EPSILON);
svm->set_bias_enabled(true);
// Create a multiclass svm classifier that consists of several of the previous one
CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine(
new CMulticlassOneVsOneStrategy(), (CDotFeatures*) features, svm, labels);
SG_REF(mc_svm);
// Train the multiclass machine using the data passed in the constructor
mc_svm->train();
// Classify the training examples and show the results
CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply());
SGVector< int32_t > out_labels = output->get_int_labels();
SGVector<int32_t>::display_vector(out_labels.vector, out_labels.vlen);
//Free resources
SG_UNREF(mc_svm);
SG_UNREF(svm);
SG_UNREF(output);
SG_UNREF(features);
SG_UNREF(labels);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
test();
exit_shogun();
return 0;
}
#include <shogun/features/Labels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/distance/EuclideanDistance.h>
#include <shogun/classifier/NearestCentroid.h>
#include <shogun/base/init.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(){
init_shogun(&print_message);
index_t num_vec=7;
index_t num_feat=2;
index_t num_class=2;
// create some data
SGMatrix<float64_t> matrix(num_feat, num_vec);
CMath::range_fill_vector(matrix.matrix, num_feat*num_vec);
// Create features ; shogun will now own the matrix created
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix);
CMath::display_matrix(matrix.matrix,num_feat,num_vec);
//Create labels
CLabels* labels=new CLabels(num_vec);
for (index_t i=0; i<num_vec; ++i)
labels->set_label(i, i%num_class);
//Create Euclidean Distance
CEuclideanDistance* distance = new CEuclideanDistance(features,features);
//Create Nearest Centroid
CNearestCentroid* nearest_centroid = new CNearestCentroid(distance, labels);
nearest_centroid->train();
// classify on training examples
CLabels* output=nearest_centroid->apply();
CMath::display_vector(output->get_labels().vector, output->get_num_labels(),
"batch output");
SG_UNREF(output);
// free up memory
SG_UNREF(nearest_centroid);
exit_shogun();
return 0;
}#include <shogun/base/init.h>
#include <shogun/features/Labels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/mathematics/Math.h>
#include <shogun/classifier/svm/NewtonSVM.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc,char *argv[])
{
init_shogun(&print_message,&print_message,&print_message);//initialising shogun without giving arguments shogun wont be able to print
int32_t x_n=4,x_d=2;//X dimensions : x_n for no of datapoints and x_d for dimensionality of data
SGMatrix<float64_t> fmatrix(x_d,x_n);
SG_SPRINT("\nTEST 1:\n\n");
/*Initialising Feature Matrix */
for (int i=0; i<x_n*x_d; i++)
fmatrix.matrix[i] = i+1;
SG_SPRINT("FEATURE MATRIX :\n");
CMath::display_matrix(fmatrix.matrix,x_d,x_n);
CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(fmatrix);
SG_REF(features);
/*Creating random labels */
CLabels* labels=new CLabels(x_n);
// create labels, two classes
labels->set_label(0,1);
labels->set_label(1,-1);
labels->set_label(2,1);
labels->set_label(3,1);
SG_REF(labels);
/*Working with Newton SVM */
float64_t lambda=1.0;
int32_t iter=20;
CNewtonSVM *nsvm = new CNewtonSVM(lambda,features,labels,iter);
SG_REF(nsvm);
nsvm->train();
SG_UNREF(labels);
SG_UNREF(nsvm);
SG_SPRINT("TEST 2:\n\n");
x_n=5;
x_d=3;
SGMatrix<float64_t> fmatrix2(x_d,x_n);
for (int i=0; i<x_n*x_d; i++)
fmatrix2.matrix[i] = i+1;
SG_SPRINT("FEATURE MATRIX :\n");
CMath::display_matrix(fmatrix2.matrix,x_d,x_n);
features->set_feature_matrix(fmatrix2);
SG_REF(features);
/*Creating random labels */
CLabels* labels2=new CLabels(x_n);
// create labels, two classes
labels2->set_label(0,1);
labels2->set_label(1,-1);
labels2->set_label(2,1);
labels2->set_label(3,1);
labels2->set_label(4,-1);
SG_REF(labels2);
/*Working with Newton SVM */
lambda=1.0;
iter=20;
CNewtonSVM *nsvm2 = new CNewtonSVM(lambda,features,labels2,iter);
SG_REF(nsvm2);
nsvm2->train();
SG_UNREF(labels2);
SG_UNREF(nsvm2);
SG_UNREF(features);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Fernando Jose Iglesias Garcia
* Copyright (C) 2012 Fernando Jose Iglesias Garcia
*/
#include <shogun/base/init.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/multiclass/QDA.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/io/SGIO.h>
#include <shogun/lib/common.h>
#include <shogun/features/DataGenerator.h>
using namespace shogun;
#define NUM 50
#define DIMS 2
#define CLASSES 2
void test()
{
#ifdef HAVE_LAPACK
SGVector< float64_t > lab(CLASSES*NUM);
SGMatrix< float64_t > feat(DIMS, CLASSES*NUM);
feat = CDataGenerator::generate_gaussians(NUM,CLASSES,DIMS);
for( int i = 0 ; i < CLASSES ; ++i )
for( int j = 0 ; j < NUM ; ++j )
lab[i*NUM+j] = double(i);
// Create train labels
CMulticlassLabels* labels = new CMulticlassLabels(lab);
// Create train features
CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feat);
// Create QDA classifier
CQDA* qda = new CQDA(features, labels);
SG_REF(qda);
qda->train();
// Classify and display output
CMulticlassLabels* output = CLabelsFactory::to_multiclass(qda->apply());
SG_REF(output);
SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels());
// Free memory
SG_UNREF(output);
SG_UNREF(qda);
#endif // HAVE_LAPACK
}
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/lib/config.h>
#include <shogun/kernel/string/DistantSegmentsKernel.h>
#include <shogun/kernel/CustomKernel.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/StringFeatures.h>
#include <shogun/classifier/svm/SVMLight.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/lib/SGStringList.h>
using namespace shogun;
#ifdef USE_SVMLIGHT
void test_svmlight()
{
/* data is random length strings with only zeros (A) or ones (B) */
index_t num_train=100;
index_t num_test=50;
index_t max_length=100;
float64_t p_x=0.5; // probability for class A
float64_t mostly_prob=0.8;
CDenseLabels* labels=new CBinaryLabels(num_train+num_test);
CMath::init_random(17);
SGStringList<char> data(num_train+num_test, max_length);
for (index_t i=0; i<num_train+num_test; ++i)
{
/* determine length */
index_t length=CMath::random(1, max_length);
/* allocate string */
data.strings[i]=SGString<char>(length);
/* fill with elements and set label */
if (p_x<CMath::random(0.0, 1.0))
{
labels->set_label(i, 1);
for (index_t j=0; j<length; ++j)
{
char c=mostly_prob<CMath::random(0.0, 1.0) ? '0' : '1';
data.strings[i].string[j]=c;
}
}
else
{
labels->set_label(i, -1);
for (index_t j=0; j<length; ++j)
{
char c=mostly_prob<CMath::random(0.0, 1.0) ? '1' : '0';
data.strings[i].string[j]=c;
}
}
SG_SPRINT("datum %d, class %d:\t", i, labels->get_int_label(i));
for (index_t j=0; j<length; ++j)
SG_SPRINT("%c", data.strings[i].string[j]);
SG_SPRINT("\n");
}
CStringFeatures<char>* feats=new CStringFeatures<char>(data, BINARY);
/* copy training and test data */
SGVector<index_t> train_inds(num_train);
train_inds.range_fill();
SGVector<index_t> test_inds(num_test);
test_inds.range_fill();
test_inds.add(num_train);
CStringFeatures<char>* feats_train=
(CStringFeatures<char>*)feats->copy_subset(train_inds);
CStringFeatures<char>* feats_test=
(CStringFeatures<char>*)feats->copy_subset(test_inds);
labels->add_subset(train_inds);
CLabels* labels_train=new CBinaryLabels(labels->get_labels_copy());
labels->remove_subset();
labels->add_subset(test_inds);
CLabels* labels_test=new CBinaryLabels(labels->get_labels_copy());
labels->remove_subset();
/* string kernel */
CDistantSegmentsKernel* kernel=new CDistantSegmentsKernel(10, 2, 2);
/* SVM training and testing without precomputing the kernel */
float64_t C=1;
CSVM* svm=new CSVMLight(C, kernel, labels_train);
// CSVM* svm=new CLibSVM(C, kernel, labels_train);
svm->parallel->set_num_threads(1);
svm->set_store_model_features(false);
svm->train(feats_train);
SGVector<float64_t> alphas=svm->get_alphas();
SGVector<index_t> svs=svm->get_support_vectors();
float64_t bias=svm->get_bias();
CBinaryLabels* predictions=(CBinaryLabels*)svm->apply(feats_test);
alphas.display_vector("alphas");
svs.display_vector("svs");
SG_SPRINT("bias: %f\n", bias);
/* now the same with a precopumputed kernel */
kernel->init(feats, feats);
CCustomKernel* precomputed=new CCustomKernel(kernel);
precomputed->add_row_subset(train_inds);
precomputed->add_col_subset(train_inds);
SGMatrix<float64_t> km_train=precomputed->get_kernel_matrix();
precomputed->remove_col_subset();
precomputed->add_col_subset(test_inds);
SGMatrix<float64_t> km_test=precomputed->get_kernel_matrix();
precomputed->remove_row_subset();
precomputed->remove_col_subset();
SGMatrix<float64_t> km=precomputed->get_kernel_matrix();
// km.display_matrix("FULL");
// km_train.display_matrix("TRAIN");
// km_test.display_matrix("TEST");
/* make sure matrices are correct */
for (index_t i=0; i<km_train.num_rows; ++i)
{
for (index_t j=0; j<km_train.num_cols; ++j)
ASSERT(km_train(i, j)==km(i, j));
}
for (index_t i=0; i<km_test.num_rows; ++i)
{
for (index_t j=0; j<km_test.num_cols; ++j)
ASSERT(km_test(i, j)==km(i, j+num_train));
}
/* train and test again on custom kernel */
svm->set_kernel(new CCustomKernel(km_train));
svm->train();
SGVector<float64_t> alphas_precomputed=svm->get_alphas();
SGVector<index_t> svs_precomputed=svm->get_support_vectors();
float64_t bias_precomputed=svm->get_bias();
alphas_precomputed.display_vector("alphas_precomputed");
svs_precomputed.display_vector("svs_precomputed");
SG_SPRINT("bias_precomputed: %f\n", bias_precomputed);
svm->set_kernel(new CCustomKernel(km_test));
CBinaryLabels* predictions_precomputed=(CBinaryLabels*)svm->apply();
/* assert that the SV, alphas and b are equal, sort before (they may have
* a different ordering */
CMath::qsort(alphas.vector, alphas.vlen);
CMath::qsort(alphas_precomputed.vector, alphas_precomputed.vlen);
CMath::qsort(svs.vector, svs.vlen);
CMath::qsort(svs_precomputed.vector, svs_precomputed.vlen);
ASSERT(alphas.vlen==alphas_precomputed.vlen);
ASSERT(svs.vlen==svs_precomputed.vlen);
for (index_t i=0; i<alphas.vlen; ++i)
{
ASSERT(CMath::abs(alphas[i]-alphas_precomputed[i])<1E-3);
ASSERT(svs[i]==svs_precomputed[i]);
}
ASSERT(CMath::abs(bias-bias_precomputed)<1E-3);
/* assert that predictions are the same */
predictions->get_int_labels().display_vector("predictions");
predictions_precomputed->get_int_labels().
display_vector("predictions_precomputed");
for (index_t i=0; i<predictions->get_num_labels(); ++i)
{
ASSERT(predictions->get_int_label(i)==
predictions_precomputed->get_int_label(i));
}
/* clean up */
SG_SPRINT("cleaning up\n");
SG_UNREF(svm);
SG_UNREF(precomputed);
SG_UNREF(labels);
SG_UNREF(labels_test);
SG_UNREF(predictions);
SG_UNREF(predictions_precomputed);
SG_UNREF(feats_train);
SG_UNREF(feats_test);
}
int main()
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
test_svmlight();
exit_shogun();
return 0;
}
#else
int main(int argc, char **argv)
{
return 0;
}
#endif
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/clustering/KMeans.h>
#include <shogun/distance/EuclideanDistance.h>
#include <shogun/distance/MinkowskiMetric.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
int32_t num_clusters=4;
int32_t num_features=11;
int32_t dim_features=3;
int32_t num_vectors_per_cluster=5;
float64_t cluster_std_dev=2.0;
/* build random cluster centers */
SGMatrix<float64_t> cluster_centers(dim_features, num_clusters);
SGVector<float64_t>::random_vector(cluster_centers.matrix, dim_features*num_clusters,
-10.0, 10.0);
SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows,
cluster_centers.num_cols, "cluster centers");
/* create data around clusters */
SGMatrix<float64_t> data(dim_features, num_clusters*num_vectors_per_cluster);
for (index_t i=0; i<num_clusters; ++i)
{
for (index_t j=0; j<dim_features; ++j)
{
for (index_t k=0; k<num_vectors_per_cluster; ++k)
{
index_t idx=i*dim_features*num_vectors_per_cluster;
idx+=j;
idx+=k*dim_features;
float64_t entry=cluster_centers.matrix[i*dim_features+j];
data.matrix[idx]=CMath::normal_random(entry, cluster_std_dev);
}
}
}
/* create features, SG_REF to avoid deletion */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
features->set_feature_matrix(data);
SG_REF(features);
/* create labels for cluster centers */
CMulticlassLabels* labels=new CMulticlassLabels(num_features);
for (index_t i=0; i<num_features; ++i)
labels->set_label(i, i%2==0 ? 0 : 1);
/* create distance */
CEuclideanDistance* distance=new CEuclideanDistance(features, features);
/* create distance machine */
CKMeans* clustering=new CKMeans(num_clusters, distance);
clustering->train(features);
/* build clusters */
CMulticlassLabels* result=CLabelsFactory::to_multiclass(clustering->apply());
for (index_t i=0; i<result->get_num_labels(); ++i)
SG_SPRINT("cluster index of vector %i: %f\n", i, result->get_label(i));
/* print cluster centers */
CDenseFeatures<float64_t>* centers=
(CDenseFeatures<float64_t>*)distance->get_lhs();
SGMatrix<float64_t> centers_matrix=centers->get_feature_matrix();
SGMatrix<float64_t>::display_matrix(centers_matrix.matrix, centers_matrix.num_rows,
centers_matrix.num_cols, "learned centers");
SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows,
cluster_centers.num_cols, "real centers");
/* clean up */
SG_UNREF(result);
SG_UNREF(centers);
SG_UNREF(clustering);
SG_UNREF(labels);
SG_UNREF(features);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/DiffusionMaps.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CDiffusionMaps* dmaps = new CDiffusionMaps();
dmaps->set_target_dim(2);
dmaps->set_t(10);
dmaps->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = dmaps->embed(features);
SG_UNREF(embedding);
SG_UNREF(dmaps);
SG_UNREF(features);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Sergey Lisitsyn
*/
#include <shogun/lib/config.h>
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/FactorAnalysis.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CFactorAnalysis* fa = new CFactorAnalysis();
CDenseFeatures<double>* embedding = fa->embed(features);
SG_UNREF(embedding);
SG_UNREF(fa);
SG_UNREF(features);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/HessianLocallyLinearEmbedding.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CHessianLocallyLinearEmbedding* hlle = new CHessianLocallyLinearEmbedding();
hlle->set_target_dim(2);
hlle->set_k(8);
hlle->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = hlle->embed(features);
SG_UNREF(embedding);
SG_UNREF(hlle);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/Isomap.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CIsomap* isomap = new CIsomap();
isomap->set_target_dim(2);
isomap->set_landmark(false);
isomap->set_k(4);
isomap->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = isomap->embed(features);
SG_UNREF(embedding);
SG_UNREF(isomap);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Kevin Hughes
*
* Thanks to Andreas Ziehe
*/
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <iostream>
using namespace shogun;
#include <shogun/features/DenseFeatures.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/eigen3.h>
#include <shogun/converter/ica/Jade.h>
#include <shogun/evaluation/ica/PermutationMatrix.h>
#include <shogun/evaluation/ica/AmariIndex.h>
using namespace Eigen;
void test()
{
// Generate sample data
CMath::init_random(0);
int n_samples = 2000;
VectorXd time(n_samples, true);
time.setLinSpaced(n_samples,0,10);
// Source Signals
MatrixXd S(2,n_samples);
for(int i = 0; i < n_samples; i++)
{
// Sin wave
S(0,i) = sin(2*time[i]);
S(0,i) += 0.2*CMath::randn_double();
// Square wave
S(1,i) = sin(3*time[i]) < 0 ? -1 : 1;
S(1,i) += 0.2*CMath::randn_double();
}
// Standardize data
VectorXd avg = S.rowwise().sum() / n_samples;
VectorXd std = ((S.colwise() - avg).array().pow(2).rowwise().sum() / n_samples).array().sqrt();
for(int i = 0; i < n_samples; i++)
S.col(i) = S.col(i).cwiseQuotient(std);
// Mixing Matrix
SGMatrix<float64_t> mixing_matrix(2,2);
Map<MatrixXd> A(mixing_matrix.matrix,2,2);
A(0,0) = 1; A(0,1) = 0.5;
A(1,0) = 0.5; A(1,1) = 1;
std::cout << "Mixing Matrix:" << std::endl;
std::cout << A << std::endl << std::endl;
// Mix signals
SGMatrix<float64_t> X(2,n_samples);
Map<MatrixXd> EX(X.matrix,2,n_samples);
EX = A * S;
CDenseFeatures< float64_t >* mixed_signals = new CDenseFeatures< float64_t >(X);
// Separate
CJade* jade = new CJade();
SG_REF(jade);
CFeatures* signals = jade->apply(mixed_signals);
SG_REF(signals);
// Close to a permutation matrix (with random scales)
Map<MatrixXd> EA(jade->get_mixing_matrix().matrix,2,2);
std::cout << "Estimated Mixing Matrix:" << std::endl;
std::cout << EA << std::endl << std::endl;
SGMatrix<float64_t> P(2,2);
Eigen::Map<MatrixXd> EP(P.matrix,2,2);
EP = EA.inverse() * A;
bool isperm = is_permutation_matrix(P);
std::cout << "EA^-1 * A == Permuatation Matrix is: " << isperm << std::endl;
float64_t amari_err = amari_index(jade->get_mixing_matrix(), mixing_matrix, true);
std::cout << "Amari Error: " << amari_err << std::endl;
SG_UNREF(jade);
SG_UNREF(mixed_signals);
SG_UNREF(signals);
return;
}
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/KernelLocallyLinearEmbedding.h>
#include <shogun/kernel/LinearKernel.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CKernelLocallyLinearEmbedding* klle = new CKernelLocallyLinearEmbedding();
CKernel* kernel = new CLinearKernel();
klle->set_target_dim(2);
klle->set_k(4);
klle->set_kernel(kernel);
klle->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = klle->embed(features);
SG_UNREF(embedding);
SG_UNREF(klle);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/LaplacianEigenmaps.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CLaplacianEigenmaps* lem = new CLaplacianEigenmaps();
lem->set_target_dim(2);
lem->set_k(10);
lem->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = lem->embed(features);
SG_UNREF(embedding);
SG_UNREF(lem);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/LinearLocalTangentSpaceAlignment.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CLinearLocalTangentSpaceAlignment* lltsa = new CLinearLocalTangentSpaceAlignment();
lltsa->set_target_dim(2);
lltsa->set_k(4);
lltsa->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = lltsa->embed(features);
SG_UNREF(embedding);
SG_UNREF(lltsa);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/LocalityPreservingProjections.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CLocalityPreservingProjections* lpp = new CLocalityPreservingProjections();
lpp->set_target_dim(2);
lpp->set_k(10);
lpp->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = lpp->embed(features);
SG_UNREF(embedding);
SG_UNREF(lpp);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/LocallyLinearEmbedding.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CLocallyLinearEmbedding* lle = new CLocallyLinearEmbedding();
lle->set_target_dim(2);
lle->set_k(4);
lle->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = lle->embed(features);
SG_UNREF(embedding);
SG_UNREF(lle);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/LocalTangentSpaceAlignment.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CLocalTangentSpaceAlignment* ltsa = new CLocalTangentSpaceAlignment();
ltsa->set_target_dim(2);
ltsa->set_k(4);
ltsa->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = ltsa->embed(features);
SG_UNREF(embedding);
SG_UNREF(ltsa);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/MultidimensionalScaling.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CMultidimensionalScaling* mds = new CMultidimensionalScaling();
mds->set_target_dim(2);
mds->set_landmark(true);
mds->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = mds->embed(features);
SG_UNREF(embedding);
SG_UNREF(mds);
SG_UNREF(features);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/NeighborhoodPreservingEmbedding.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
int N = 100;
int dim = 3;
float64_t* matrix = new double[N*dim];
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N));
SG_REF(features);
CNeighborhoodPreservingEmbedding* npe = new CNeighborhoodPreservingEmbedding();
npe->set_target_dim(2);
npe->set_k(15);
npe->parallel->set_num_threads(4);
CDenseFeatures<double>* embedding = npe->embed(features);
SG_UNREF(embedding);
SG_UNREF(npe);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Fernando José Iglesias GarcÃa
* Copyright (C) 2012 Fernando José Iglesias GarcÃa
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/converter/StochasticProximityEmbedding.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
int N = 100;
int dim = 3;
// Generate toy data
SGMatrix< float64_t > matrix(dim, N);
for (int i=0; i<N*dim; i++)
matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14);
CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(matrix);
SG_REF(features);
// Create embedding and set parameters for global strategy
CStochasticProximityEmbedding* spe = new CStochasticProximityEmbedding();
spe->set_target_dim(2);
spe->set_strategy(SPE_GLOBAL);
spe->set_nupdates(40);
SG_REF(spe);
// Apply embedding with global strategy
CDenseFeatures< float64_t >* embedding = spe->embed(features);
SG_REF(embedding);
// Set parameters for local strategy
spe->set_strategy(SPE_LOCAL);
spe->set_k(12);
// Apply embedding with local strategy
SG_UNREF(embedding);
embedding = spe->embed(features);
SG_REF(embedding);
// Free memory
SG_UNREF(embedding);
SG_UNREF(spe);
SG_UNREF(features);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void test_cross_validation()
{
/* data matrix dimensions */
index_t num_vectors=40;
index_t num_features=5;
/* data means -1, 1 in all components, std deviation of 3 */
SGVector<float64_t> mean_1(num_features);
SGVector<float64_t> mean_2(num_features);
SGVector<float64_t>::fill_vector(mean_1.vector, mean_1.vlen, -1.0);
SGVector<float64_t>::fill_vector(mean_2.vector, mean_2.vlen, 1.0);
float64_t sigma=3;
SGVector<float64_t>::display_vector(mean_1.vector, mean_1.vlen, "mean 1");
SGVector<float64_t>::display_vector(mean_2.vector, mean_2.vlen, "mean 2");
/* fill data matrix around mean */
SGMatrix<float64_t> train_dat(num_features, num_vectors);
for (index_t i=0; i<num_vectors; ++i)
{
for (index_t j=0; j<num_features; ++j)
{
float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0];
train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma);
}
}
/* training features */
CDenseFeatures<float64_t>* features=
new CDenseFeatures<float64_t>(train_dat);
SG_REF(features);
/* training labels +/- 1 for each cluster */
SGVector<float64_t> lab(num_vectors);
for (index_t i=0; i<num_vectors; ++i)
lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0;
CBinaryLabels* labels=new CBinaryLabels(lab);
/* gaussian kernel */
int32_t kernel_cache=100;
int32_t width=10;
CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, width);
kernel->init(features, features);
/* create svm via libsvm */
float64_t svm_C=10;
float64_t svm_eps=0.0001;
CLibSVM* svm=new CLibSVM(svm_C, kernel, labels);
svm->set_epsilon(svm_eps);
/* train and output */
svm->train(features);
CBinaryLabels* output=CLabelsFactory::to_binary(svm->apply(features));
for (index_t i=0; i<num_vectors; ++i)
SG_SPRINT("i=%d, class=%f,\n", i, output->get_label(i));
/* evaluation criterion */
CContingencyTableEvaluation* eval_crit=
new CContingencyTableEvaluation(ACCURACY);
/* evaluate training error */
float64_t eval_result=eval_crit->evaluate(output, labels);
SG_SPRINT("training error: %f\n", eval_result);
SG_UNREF(output);
/* assert that regression "works". this is not guaranteed to always work
* but should be a really coarse check to see if everything is going
* approx. right */
ASSERT(eval_result<2);
/* splitting strategy */
index_t n_folds=5;
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, n_folds);
/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(svm, features, labels,
splitting, eval_crit);
cross->set_num_runs(10);
// cross->set_conf_int_alpha(0.05);
/* actual evaluation */
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CrossValidationResult!");
result->print_result();
/* clean up */
SG_UNREF(result);
SG_UNREF(cross);
SG_UNREF(features);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
sg_io->set_loglevel(MSG_DEBUG);
test_cross_validation();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
* Written (W) 2013 Saurabh Mahindre
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/multiclass/KNN.h>
#include <shogun/io/SGIO.h>
#include <shogun/io/CSVFile.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/MulticlassAccuracy.h>
#include <shogun/distance/EuclideanDistance.h>
using namespace shogun;
// Prepare to read a file for the training data
const char fname_feats[] = "../data/fm_train_real.dat";
const char fname_labels[] = "../data/label_train_multiclass.dat";
void test_cross_validation()
{
index_t k =4;
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
/* create knn */
CEuclideanDistance* distance = new CEuclideanDistance(features, features);
CKNN* knn=new CKNN (k, distance, labels);
/* train and output */
knn->train(features);
CMulticlassLabels* output=CLabelsFactory::to_multiclass(knn->apply(features));
for (index_t i=0; i<features->get_num_vectors(); ++i)
SG_SPRINT("i=%d, class=%f,\n", i, output->get_label(i));
/* evaluation criterion */
CMulticlassAccuracy* eval_crit = new CMulticlassAccuracy ();
/* evaluate training error */
float64_t eval_result=eval_crit->evaluate(output, labels);
SG_SPRINT("training accuracy: %f\n", eval_result);
SG_UNREF(output);
/* assert that regression "works". this is not guaranteed to always work
* but should be a really coarse check to see if everything is going
* approx. right */
ASSERT(eval_result<2);
/* splitting strategy */
index_t n_folds=5;
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, n_folds);
/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(knn, features, labels,
splitting, eval_crit);
cross->set_num_runs(1);
// cross->set_conf_int_alpha(0.05);
/* actual evaluation */
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
result->print_result();
/* clean up */
SG_UNREF(result);
SG_UNREF(cross);
SG_UNREF(features);
SG_UNREF(labels);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
test_cross_validation();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/classifier/svm/SVMLight.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/lib/Time.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void test_cross_validation()
{
/* data matrix dimensions */
index_t num_vectors=50;
index_t num_features=5;
/* data means -1, 1 in all components, std deviation of sigma */
SGVector<float64_t> mean_1(num_features);
SGVector<float64_t> mean_2(num_features);
SGVector<float64_t>::fill_vector(mean_1.vector, mean_1.vlen, -1.0);
SGVector<float64_t>::fill_vector(mean_2.vector, mean_2.vlen, 1.0);
float64_t sigma=1.5;
/* fill data matrix around mean */
SGMatrix<float64_t> train_dat(num_features, num_vectors);
for (index_t i=0; i<num_vectors; ++i)
{
for (index_t j=0; j<num_features; ++j)
{
float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0];
train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma);
}
}
/* training features */
CDenseFeatures<float64_t>* features=
new CDenseFeatures<float64_t>(train_dat);
SG_REF(features);
/* training labels +/- 1 for each cluster */
SGVector<float64_t> lab(num_vectors);
for (index_t i=0; i<num_vectors; ++i)
lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0;
CBinaryLabels* labels=new CBinaryLabels(lab);
/* gaussian kernel */
CGaussianKernel* kernel=new CGaussianKernel();
kernel->set_width(10);
kernel->init(features, features);
/* create svm via libsvm */
float64_t svm_C=1;
float64_t svm_eps=0.0001;
CSVM* svm=new CLibSVM(svm_C, kernel, labels);
svm->set_epsilon(svm_eps);
/* train and output the normal way */
SG_SPRINT("starting normal training\n");
svm->train(features);
CBinaryLabels* output=CLabelsFactory::to_binary(svm->apply(features));
/* evaluation criterion */
CContingencyTableEvaluation* eval_crit=
new CContingencyTableEvaluation(ACCURACY);
/* evaluate training error */
float64_t eval_result=eval_crit->evaluate(output, labels);
SG_SPRINT("training accuracy: %f\n", eval_result);
SG_UNREF(output);
/* assert that regression "works". this is not guaranteed to always work
* but should be a really coarse check to see if everything is going
* approx. right */
ASSERT(eval_result<2);
/* splitting strategy */
index_t n_folds=3;
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, n_folds);
/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(svm, features, labels,
splitting, eval_crit);
cross->set_num_runs(5);
// cross->set_conf_int_alpha(0.05);
CCrossValidationResult* tmp;
/* no locking */
index_t repetitions=5;
SG_SPRINT("unlocked x-val\n");
kernel->init(features, features);
cross->set_autolock(false);
CTime time;
time.start();
for (index_t i=0; i<repetitions; ++i)
{
tmp = (CCrossValidationResult*)cross->evaluate();
SG_UNREF(tmp);
}
time.stop();
SG_SPRINT("%f sec\n", time.cur_time_diff());
/* auto_locking in every iteration of this loop (better, not so nice) */
SG_SPRINT("locked in every iteration x-val\n");
cross->set_autolock(true);
time.start();
for (index_t i=0; i<repetitions; ++i)
{
tmp = (CCrossValidationResult*)cross->evaluate();
SG_UNREF(tmp);
}
time.stop();
SG_SPRINT("%f sec\n", time.cur_time_diff());
/* lock once before, (no locking/unlocking in this loop) */
svm->data_lock(labels, features);
SG_SPRINT("locked x-val\n");
time.start();
for (index_t i=0; i<repetitions; ++i)
{
tmp = (CCrossValidationResult*)cross->evaluate();
SG_UNREF(tmp);
}
time.stop();
SG_SPRINT("%f sec\n", time.cur_time_diff());
/* clean up */
SG_UNREF(cross);
SG_UNREF(features);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
test_cross_validation();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CombinedKernel.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/mkl/MKLClassification.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/CrossValidationPrintOutput.h>
#include <shogun/evaluation/CrossValidationMKLStorage.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/mathematics/Statistics.h>
using namespace shogun;
void gen_rand_data(SGVector<float64_t> lab, SGMatrix<float64_t> feat,
float64_t dist)
{
index_t dims=feat.num_rows;
index_t num=lab.vlen;
for (int32_t i=0; i<num; i++)
{
if (i<num/2)
{
lab[i]=-1.0;
for (int32_t j=0; j<dims; j++)
feat(j, i)=CMath::random(0.0, 1.0)+dist;
}
else
{
lab[i]=1.0;
for (int32_t j=0; j<dims; j++)
feat(j, i)=CMath::random(0.0, 1.0)-dist;
}
}
lab.display_vector("lab");
feat.display_matrix("feat");
}
void test_mkl_cross_validation()
{
/* generate random data */
index_t num=10;
index_t dims=2;
float64_t dist=0.5;
SGVector<float64_t> lab(num);
SGMatrix<float64_t> feat(dims, num);
gen_rand_data(lab, feat, dist);
/*create train labels */
CLabels* labels=new CBinaryLabels(lab);
/* create train features */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
features->set_feature_matrix(feat);
SG_REF(features);
/* create combined features */
CCombinedFeatures* comb_features=new CCombinedFeatures();
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
SG_REF(comb_features);
/* create multiple gaussian kernels */
CCombinedKernel* kernel=new CCombinedKernel();
kernel->append_kernel(new CGaussianKernel(10, 0.1));
kernel->append_kernel(new CGaussianKernel(10, 1));
kernel->append_kernel(new CGaussianKernel(10, 2));
kernel->init(comb_features, comb_features);
SG_REF(kernel);
/* create mkl using libsvm, due to a mem-bug, interleaved is not possible */
CMKLClassification* svm=new CMKLClassification(new CLibSVM());
svm->set_interleaved_optimization_enabled(false);
svm->set_kernel(kernel);
SG_REF(svm);
/* create cross-validation instance */
index_t num_folds=3;
CSplittingStrategy* split=new CStratifiedCrossValidationSplitting(labels,
num_folds);
CEvaluation* eval=new CContingencyTableEvaluation(ACCURACY);
CCrossValidation* cross=new CCrossValidation(svm, comb_features, labels, split, eval, false);
/* add print output listener and mkl storage listener */
cross->add_cross_validation_output(new CCrossValidationPrintOutput());
CCrossValidationMKLStorage* mkl_storage=new CCrossValidationMKLStorage();
cross->add_cross_validation_output(mkl_storage);
/* perform cross-validation, this will print loads of information
* (caused by the CCrossValidationPrintOutput instance attached to it) */
CEvaluationResult* result=cross->evaluate();
/* print mkl weights */
SGMatrix<float64_t> weights=mkl_storage->get_mkl_weights();
weights.display_matrix("mkl weights");
/* print mean and variance of each kernel weight. These could for example
* been used to compute confidence intervals */
CStatistics::matrix_mean(weights, false).display_vector("mean per kernel");
CStatistics::matrix_variance(weights, false).display_vector("variance per kernel");
CStatistics::matrix_std_deviation(weights, false).display_vector("std-dev per kernel");
SG_UNREF(result);
/* again for two runs */
cross->set_num_runs(2);
result=cross->evaluate();
/* print mkl weights */
weights=mkl_storage->get_mkl_weights();
weights.display_matrix("mkl weights");
/* print mean and variance of each kernel weight. These could for example
* been used to compute confidence intervals */
CStatistics::matrix_mean(weights, false).display_vector("mean per kernel");
CStatistics::matrix_variance(weights, false).display_vector("variance per kernel");
CStatistics::matrix_std_deviation(weights, false).display_vector("std-dev per kernel");
/* clean up */
SG_UNREF(result);
SG_UNREF(cross);
SG_UNREF(kernel);
SG_UNREF(features);
SG_UNREF(comb_features);
SG_UNREF(svm);
}
int main()
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
test_mkl_cross_validation();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/multiclass/MulticlassLibLinear.h>
#include <shogun/io/SGIO.h>
#include <shogun/io/CSVFile.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/MulticlassAccuracy.h>
using namespace shogun;
// Prepare to read a file for the training data
const char fname_feats[] = "../data/fm_train_real.dat";
const char fname_labels[] = "../data/label_train_multiclass.dat";
void test_cross_validation()
{
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
/* create svm via libsvm */
float64_t svm_C=10;
float64_t svm_eps=0.0001;
CMulticlassLibLinear* svm=new CMulticlassLibLinear(svm_C, features, labels);
svm->set_epsilon(svm_eps);
/* train and output */
svm->train(features);
CMulticlassLabels* output=CLabelsFactory::to_multiclass(svm->apply(features));
for (index_t i=0; i<features->get_num_vectors(); ++i)
SG_SPRINT("i=%d, class=%f,\n", i, output->get_label(i));
/* evaluation criterion */
CMulticlassAccuracy* eval_crit = new CMulticlassAccuracy ();
/* evaluate training error */
float64_t eval_result=eval_crit->evaluate(output, labels);
SG_SPRINT("training accuracy: %f\n", eval_result);
SG_UNREF(output);
/* assert that regression "works". this is not guaranteed to always work
* but should be a really coarse check to see if everything is going
* approx. right */
ASSERT(eval_result<2);
/* splitting strategy */
index_t n_folds=5;
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, n_folds);
/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(svm, features, labels,
splitting, eval_crit);
cross->set_num_runs(1);
// cross->set_conf_int_alpha(0.05);
/* actual evaluation */
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
result->print_result();
/* clean up */
SG_UNREF(result);
SG_UNREF(cross);
SG_UNREF(features);
SG_UNREF(labels);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
test_cross_validation();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 yoo, thereisnoknife@gmail.com
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/io/CSVFile.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/LinearKernel.h>
#include <shogun/kernel/PolyKernel.h>
#include <shogun/kernel/CombinedKernel.h>
#include <shogun/classifier/mkl/MKLMulticlass.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/MulticlassAccuracy.h>
using namespace shogun;
/* cross-validation instances */
const index_t n_folds=2;
const index_t n_runs=2;
/* file data */
const char fname_feats[]="../data/fm_train_real.dat";
const char fname_labels[]="../data/label_train_multiclass.dat";
void test_multiclass_mkl_cv()
{
/* init random number generator for reproducible results of cross-validation in the light of ASSERT(result->mean>0.81); some lines down below */
sg_rand->set_seed(12);
/* dense features from matrix */
CCSVFile* feature_file = new CCSVFile(fname_feats);
SGMatrix<float64_t> mat=SGMatrix<float64_t>();
mat.load(feature_file);
SG_UNREF(feature_file);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat);
SG_REF(features);
/* labels from vector */
CCSVFile* label_file = new CCSVFile(fname_labels);
SGVector<float64_t> label_vec;
label_vec.load(label_file);
SG_UNREF(label_file);
CMulticlassLabels* labels=new CMulticlassLabels(label_vec);
SG_REF(labels);
/* combined features and kernel */
CCombinedFeatures *cfeats=new CCombinedFeatures();
CCombinedKernel *cker=new CCombinedKernel();
SG_REF(cfeats);
SG_REF(cker);
/** 1st kernel: gaussian */
cfeats->append_feature_obj(features);
cker->append_kernel(new CGaussianKernel(features, features, 1.2, 10));
/** 2nd kernel: linear */
cfeats->append_feature_obj(features);
cker->append_kernel(new CLinearKernel(features, features));
/** 3rd kernel: poly */
cfeats->append_feature_obj(features);
cker->append_kernel(new CPolyKernel(features, features, 2, true, 10));
cker->init(cfeats, cfeats);
/* create mkl instance */
CMKLMulticlass* mkl=new CMKLMulticlass(1.2, cker, labels);
SG_REF(mkl);
mkl->set_epsilon(0.00001);
mkl->parallel->set_num_threads(1);
mkl->set_mkl_epsilon(0.001);
mkl->set_mkl_norm(1.5);
/* train to see weights */
mkl->train();
cker->get_subkernel_weights().display_vector("weights");
CMulticlassAccuracy* eval_crit=new CMulticlassAccuracy();
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, n_folds);
CCrossValidation *cross=new CCrossValidation(mkl, cfeats, labels, splitting,
eval_crit);
cross->set_autolock(false);
cross->set_num_runs(n_runs);
// cross->set_conf_int_alpha(0.05);
/* perform x-val and print result */
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
SG_SPRINT("mean of %d %d-fold x-val runs: %f\n", n_runs, n_folds,
result->mean);
/* assert high accuracy */
ASSERT(result->mean>0.81);
/* clean up */
SG_UNREF(features);
SG_UNREF(labels);
SG_UNREF(cfeats);
SG_UNREF(cker);
SG_UNREF(mkl);
SG_UNREF(cross);
SG_UNREF(result);
}
int main(int argc, char** argv){
shogun::init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
/* performs cross-validation on a multi-class mkl machine */
test_multiclass_mkl_cv();
exit_shogun();
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/kernel/LinearKernel.h>
#include <shogun/regression/KernelRidgeRegression.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/CrossValidationSplitting.h>
#include <shogun/evaluation/MeanSquaredError.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void test_cross_validation()
{
#ifdef HAVE_LAPACK
/* data matrix dimensions */
index_t num_vectors=100;
index_t num_features=1;
/* training label data */
SGVector<float64_t> lab(num_vectors);
/* fill data matrix and labels */
SGMatrix<float64_t> train_dat(num_features, num_vectors);
SGVector<float64_t>::range_fill_vector(train_dat.matrix, num_vectors);
for (index_t i=0; i<num_vectors; ++i)
{
/* labels are linear plus noise */
lab.vector[i]=i+CMath::normal_random(0, 1.0);
}
/* training features */
CDenseFeatures<float64_t>* features=
new CDenseFeatures<float64_t>(train_dat);
SG_REF(features);
/* training labels */
CRegressionLabels* labels=new CRegressionLabels(lab);
/* kernel */
CLinearKernel* kernel=new CLinearKernel();
kernel->init(features, features);
/* kernel ridge regression*/
float64_t tau=0.0001;
CKernelRidgeRegression* krr=new CKernelRidgeRegression(tau, kernel, labels);
/* evaluation criterion */
CMeanSquaredError* eval_crit=
new CMeanSquaredError();
/* train and output */
krr->train(features);
CRegressionLabels* output= CLabelsFactory::to_regression(krr->apply());
for (index_t i=0; i<num_vectors; ++i)
{
SG_SPRINT("x=%f, train=%f, predict=%f\n", train_dat.matrix[i],
labels->get_label(i), output->get_label(i));
}
/* evaluate training error */
float64_t eval_result=eval_crit->evaluate(output, labels);
SG_SPRINT("training error: %f\n", eval_result);
SG_UNREF(output);
/* assert that regression "works". this is not guaranteed to always work
* but should be a really coarse check to see if everything is going
* approx. right */
ASSERT(eval_result<2);
/* splitting strategy */
index_t n_folds=5;
CCrossValidationSplitting* splitting=
new CCrossValidationSplitting(labels, n_folds);
/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(krr, features, labels,
splitting, eval_crit);
cross->set_num_runs(100);
// cross->set_conf_int_alpha(0.05);
/* actual evaluation */
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
SG_SPRINT("cross_validation estimate:\n");
result->print_result();
/* same crude assertion as for above evaluation */
ASSERT(result->mean<2);
/* clean up */
SG_UNREF(result);
SG_UNREF(cross);
SG_UNREF(features);
#endif /* HAVE_LAPACK */
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
test_cross_validation();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/Subset.h>
using namespace shogun;
void test()
{
SGMatrix<float64_t> data(3, 10);
CDenseFeatures<float64_t>* f=new CDenseFeatures<float64_t>(data);
SGVector<float64_t>::range_fill_vector(data.matrix, data.num_cols*data.num_rows, 1.0);
SGMatrix<float64_t>::display_matrix(data.matrix, data.num_rows, data.num_cols,
"original feature data");
index_t offset_subset=1;
SGVector<index_t> feature_subset(8);
SGVector<index_t>::range_fill_vector(feature_subset.vector, feature_subset.vlen,
offset_subset);
SGVector<index_t>::display_vector(feature_subset.vector, feature_subset.vlen,
"feature subset");
f->add_subset(feature_subset);
SG_SPRINT("feature vectors after setting subset on original data:\n");
for (index_t i=0; i<f->get_num_vectors(); ++i)
{
SGVector<float64_t> vec=f->get_feature_vector(i);
SG_SPRINT("%i: ", i);
SGVector<float64_t>::display_vector(vec.vector, vec.vlen);
f->free_feature_vector(vec, i);
}
index_t offset_copy=2;
SGVector<index_t> feature_copy_subset(4);
SGVector<index_t>::range_fill_vector(feature_copy_subset.vector,
feature_copy_subset.vlen, offset_copy);
SGVector<index_t>::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen,
"indices that are to be copied");
CDenseFeatures<float64_t>* subset_copy=
(CDenseFeatures<float64_t>*)f->copy_subset(feature_copy_subset);
SGMatrix<float64_t> subset_copy_matrix=subset_copy->get_feature_matrix();
SGMatrix<float64_t>::display_matrix(subset_copy_matrix.matrix,
subset_copy_matrix.num_rows, subset_copy_matrix.num_cols,
"copy matrix");
index_t num_its=subset_copy_matrix.num_rows*subset_copy_matrix.num_cols;
for (index_t i=0; i<num_its; ++i)
{
index_t idx=i+(offset_copy+offset_subset)*subset_copy_matrix.num_rows;
ASSERT(subset_copy_matrix.matrix[i]==data.matrix[idx]);
}
SG_UNREF(f);
SG_UNREF(subset_copy);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/features/SparseFeatures.h>
#include <shogun/features/Subset.h>
using namespace shogun;
void test()
{
index_t num_vectors=10;
index_t num_dimensions=7;
index_t num_features=3;
/* create some sparse data */
SGSparseMatrix<float64_t> data=SGSparseMatrix<float64_t>(num_dimensions,
num_vectors);
for (index_t i=0; i<num_vectors; ++i)
{
/* put elements only at even indices */
data.sparse_matrix[i]=SGSparseVector<float64_t>(num_features);
/* fill */
for (index_t j=0; j<num_features; ++j)
{
data.sparse_matrix[i].features[j].entry=i+j;
data.sparse_matrix[i].features[j].feat_index=3*j;
}
}
CSparseFeatures<float64_t>* f=new CSparseFeatures<float64_t>(data);
/* display sparse matrix */
SG_SPRINT("original data\n");
for (index_t i=0; i<num_vectors; ++i)
{
SG_SPRINT("sparse vector at %i: [", i);
for (index_t j=0; j<num_features; ++j)
SG_SPRINT("%f, ", data.sparse_matrix[i].features[j].entry);
SG_SPRINT("]\n");
}
/* indices for a subset */
index_t offset_subset=1;
SGVector<index_t> feature_subset(8);
SGVector<index_t>::range_fill_vector(feature_subset.vector, feature_subset.vlen,
offset_subset);
SGVector<index_t>::display_vector(feature_subset.vector, feature_subset.vlen,
"feature subset");
/* set subset and print data */
f->add_subset(feature_subset);
SG_SPRINT("feature vectors after setting subset on original data:\n");
for (index_t i=0; i<f->get_num_vectors(); ++i)
{
SGSparseVector<float64_t> vec=f->get_sparse_feature_vector(i);
SG_SPRINT("sparse vector at %i: ", i);
for (index_t j=0; j<num_features; ++j)
SG_SPRINT("%f, ", vec.features[j].entry);
SG_SPRINT("]\n");
f->free_sparse_feature_vector(i);
}
/* indices that are to copy */
index_t offset_copy=2;
SGVector<index_t> feature_copy_subset(4);
SGVector<index_t>::range_fill_vector(feature_copy_subset.vector,
feature_copy_subset.vlen, offset_copy);
SGVector<index_t>::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen,
"indices that are to be copied");
/* copy a subset of features */
CSparseFeatures<float64_t>* subset_copy=
(CSparseFeatures<float64_t>*)f->copy_subset(feature_copy_subset);
/* print copied subset */
SG_SPRINT("copied features:\n");
for (index_t i=0; i<subset_copy->get_num_vectors(); ++i)
{
SGSparseVector<float64_t> vec=subset_copy->get_sparse_feature_vector(i);
SG_SPRINT("sparse vector at %i: ", i);
for (index_t j=0; j<num_features; ++j)
SG_SPRINT("%f, ", vec.features[j].entry);
SG_SPRINT("]\n");
subset_copy->free_sparse_feature_vector(i);
}
/* test if all elements are copied correctly */
for (index_t i=0; i<subset_copy->get_num_vectors(); ++i)
{
SGSparseVector<float64_t> vec=subset_copy->get_sparse_feature_vector(i);
index_t ind=i+offset_copy+offset_subset+1;
for (index_t j=0; j<vec.num_feat_entries; ++j)
{
float64_t a_entry=vec.features[j].entry;
float64_t b_entry=data.sparse_matrix[ind].features[j].entry;
index_t a_idx=vec.features[j].feat_index;
index_t b_idx=data.sparse_matrix[ind].features[j].feat_index;
ASSERT(a_entry==b_entry);
ASSERT(a_idx==b_idx);
}
subset_copy->free_sparse_feature_vector(i);
}
SG_UNREF(f);
SG_UNREF(subset_copy);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
#include <shogun/features/DenseFeatures.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
//sg_io->set_loglevel(MSG_DEBUG);
//sg_io->enable_file_and_line();
// create three 2-dimensional vectors
SGMatrix<float64_t> matrix(2,3);
for (int32_t i=0; i<6; i++)
matrix.matrix[i]=i;
// shogun will now own the matrix created
CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix);
ASSERT(features->parameter_hash_changed());
SG_UNREF(features);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
const int32_t num_labels=10;
const int32_t num_classes=3;
void test()
{
const int32_t num_subset_idx=CMath::random(1, num_labels);
/* create labels */
CMulticlassLabels* labels=new CMulticlassLabels(num_labels);
for (index_t i=0; i<num_labels; ++i)
labels->set_label(i, i%num_classes);
SG_REF(labels);
/* print labels */
SGVector<float64_t> labels_data=labels->get_labels();
SGVector<float64_t>::display_vector(labels_data.vector, labels_data.vlen, "labels");
/* create subset indices */
SGVector<index_t> subset_idx(num_subset_idx);
subset_idx.range_fill();
CMath::permute(subset_idx);
/* print subset indices */
SGVector<index_t>::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices");
/* apply subset to features */
SG_SPRINT("\n\n-------------------\n"
"applying subset to features\n"
"-------------------\n");
labels->add_subset(subset_idx);
/* do some stuff do check and output */
ASSERT(labels->get_num_labels()==num_subset_idx);
SG_SPRINT("labels->get_num_labels(): %d\n", labels->get_num_labels());
for (index_t i=0; i<labels->get_num_labels(); ++i)
{
float64_t label=labels->get_label(i);
SG_SPRINT("label %f:\n", label);
ASSERT(label==labels_data.vector[subset_idx.vector[i]]);
}
/* remove features subset */SG_SPRINT("\n\n-------------------\n"
"removing subset from features\n"
"-------------------\n");
labels->remove_all_subsets();
ASSERT(labels->get_num_labels()==num_labels);
SG_SPRINT("labels->get_num_labels(): %d\n", labels->get_num_labels());
for (index_t i=0; i<labels->get_num_labels(); ++i)
{
float64_t label=labels->get_label(i);
SG_SPRINT("label %f:\n", label);
ASSERT(label==labels_data.vector[i]);
}
SG_UNREF(labels);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/Subset.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void check_transposed(CDenseFeatures<int32_t>* features)
{
CDenseFeatures<int32_t>* transposed=features->get_transposed();
CDenseFeatures<int32_t>* double_transposed=transposed->get_transposed();
for (index_t i=0; i<features->get_num_vectors(); ++i)
{
SGVector<int32_t> orig_vec=features->get_feature_vector(i);
SGVector<int32_t> new_vec=double_transposed->get_feature_vector(i);
ASSERT(orig_vec.vlen==new_vec.vlen);
for (index_t j=0; j<orig_vec.vlen; j++)
ASSERT(orig_vec.vector[j]==new_vec.vector[j]);
/* not necessary since feature matrix is in memory. for documentation */
features->free_feature_vector(orig_vec,i);
double_transposed->free_feature_vector(new_vec, i);
}
SG_UNREF(transposed);
SG_UNREF(double_transposed);
}
const int32_t num_vectors=6;
const int32_t dim_features=6;
void test()
{
const int32_t num_subset_idx=CMath::random(1, num_vectors);
/* create feature data matrix */
SGMatrix<int32_t> data(dim_features, num_vectors);
/* fill matrix with random data */
for (index_t i=0; i<num_vectors; ++i)
{
for (index_t j=0; j<dim_features; ++j)
data.matrix[i*dim_features+j]=CMath::random(-5, 5);
}
/* create simple features */
CDenseFeatures<int32_t>* features=new CDenseFeatures<int32_t> (data);
SG_REF(features);
/* print feature matrix */
SGMatrix<int32_t>::display_matrix(data.matrix, data.num_rows, data.num_cols,
"feature matrix");
/* create subset indices */
SGVector<index_t> subset_idx(num_subset_idx);
subset_idx.range_fill();
CMath::permute(subset_idx);
/* print subset indices */
SGVector<index_t>::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices");
/* apply subset to features */
SG_SPRINT("\n\n-------------------\n"
"applying subset to features\n"
"-------------------\n");
features->add_subset(subset_idx);
/* do some stuff do check and output */
ASSERT(features->get_num_vectors()==num_subset_idx);
/* check get_Transposed method */
SG_SPRINT("checking transpose...");
check_transposed(features);
SG_SPRINT("does work\n");
SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors());
for (index_t i=0; i<features->get_num_vectors(); ++i)
{
SGVector<int32_t> vec=features->get_feature_vector(i);
SG_SPRINT("vector %d: ", i);
SGVector<int32_t>::display_vector(vec.vector, vec.vlen);
for (index_t j=0; j<dim_features; ++j)
ASSERT(vec.vector[j]==data.matrix[subset_idx.vector[i]*num_vectors+j]);
/* not necessary since feature matrix is in memory. for documentation */
features->free_feature_vector(vec, i);
}
/* remove features subset */
SG_SPRINT("\n\n-------------------\n"
"removing subset from features\n"
"-------------------\n");
features->remove_all_subsets();
/* do some stuff do check and output */
ASSERT(features->get_num_vectors()==num_vectors);
SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors());
/* check get_Transposed method */
SG_SPRINT("checking transpose...");
check_transposed(features);
SG_SPRINT("does work\n");
for (index_t i=0; i<features->get_num_vectors(); ++i)
{
SGVector<int32_t> vec=features->get_feature_vector(i);
SG_SPRINT("vector %d: ", i);
SGVector<int32_t>::display_vector(vec.vector, vec.vlen);
for (index_t j=0; j<dim_features; ++j)
ASSERT(vec.vector[j]==data.matrix[i*num_vectors+j]);
/* not necessary since feature matrix is in memory. for documentation */
features->free_feature_vector(vec, i);
}
SG_UNREF(features);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/features/SubsetStack.h>
using namespace shogun;
void test()
{
CSubsetStack* stack=new CSubsetStack();
SG_REF(stack);
/* subset indices, each set is shifted by one */
SGVector<index_t> subset_a(10);
SGVector<index_t> subset_b(4);
subset_a.range_fill(1);
subset_b.range_fill(1);
/* add and remove subsets a couple of times */
stack->add_subset(subset_a);
stack->remove_subset();
stack->add_subset(subset_b);
stack->remove_subset();
/* add and remove subsets a couple of times, different order */
stack->add_subset(subset_a);
stack->add_subset(subset_b);
stack->remove_subset();
stack->remove_subset();
/** add two subsets and check if index mapping works */
stack->add_subset(subset_a);
stack->add_subset(subset_b);
/* remember, offset of one for each index set */
for (index_t i=0; i<subset_b.vlen; ++i)
ASSERT(stack->subset_idx_conversion(i)==i+2);
stack->remove_subset();
stack->remove_subset();
/* clean up */
SG_UNREF(stack);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
#include <shogun/features/hashed/HashedDenseFeatures.h>
#include <shogun/features/hashed/HashedSparseFeatures.h>
#include <shogun/mathematics/Math.h>
#include <shogun/kernel/PolyKernel.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
int32_t num_vectors = 5;
int32_t dim = 20;
SGMatrix<int32_t> mat(dim, num_vectors);
for (index_t v=0; v<num_vectors; v++)
{
for (index_t d=0; d<dim; d++)
mat(d,v) = CMath::random(-dim, dim);
}
int32_t hashing_dim = 12;
CHashedDenseFeatures<int32_t>* h_dense_feats = new CHashedDenseFeatures<int32_t>(mat, hashing_dim);
CSparseFeatures<int32_t>* sparse_feats = new CSparseFeatures<int32_t>(mat);
CHashedSparseFeatures<int32_t>* h_sparse_feats = new CHashedSparseFeatures<int32_t>(sparse_feats, hashing_dim);
SG_REF(h_dense_feats);
CPolyKernel* kernel = new CPolyKernel(h_dense_feats, h_dense_feats, 1, false);
SGMatrix<float64_t> dense_mt = kernel->get_kernel_matrix();
SG_UNREF(kernel);
SG_REF(h_sparse_feats);
kernel = new CPolyKernel(h_sparse_feats, h_sparse_feats, 1, false);
SGMatrix<float64_t> sparse_mt = kernel->get_kernel_matrix();
SG_UNREF(kernel);
for (index_t i=0; i<dense_mt.num_rows; i++)
{
for (index_t j=0; j<dense_mt.num_cols; j++)
ASSERT(dense_mt(i,j)==sparse_mt(i,j))
}
dense_mt.display_matrix("Dense matrix");
sparse_mt.display_matrix("Sparse matrix");
SG_UNREF(h_dense_feats);
SG_UNREF(h_sparse_feats);
exit_shogun();
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2014 Jiaolong Xu
* Copyright (C) 2014 Jiaolong Xu
*/
#include <shogun/io/LibSVMFile.h>
#include <shogun/lib/SGVector.h>
#include <shogun/lib/SGSparseVector.h>
#include <shogun/base/DynArray.h>
#include <shogun/base/init.h>
using namespace shogun;
#define SHOW_DATA
/* file data */
const char fname_svm_multilabel[] = "../../../../data/multilabel/yeast_test.svm";
void test_libsvmfile_multilabel(const char* fname)
{
FILE* pfile = fopen(fname, "r");
if (pfile == NULL)
{
SG_SPRINT("Unable to open file: %s\n", fname);
return;
}
fclose(pfile);
/* sparse data from matrix*/
CLibSVMFile* svmfile = new CLibSVMFile(fname);
SGSparseVector<float64_t>* feats;
SGVector<float64_t>* labels;
int32_t dim_feat;
int32_t num_samples;
int32_t num_classes;
svmfile->get_sparse_matrix(feats, dim_feat, num_samples, labels, num_classes);
#ifdef SHOW_DATA
// Display the labels
for (int32_t i = 0; i < num_samples; i++)
{
labels[i].display_vector();
}
#endif
SG_SPRINT("Number of the samples: %d\n", num_samples);
SG_SPRINT("Dimention of the feature: %d\n", dim_feat);
SG_SPRINT("Number of classes: %d\n", num_classes);
SG_UNREF(svmfile);
SG_FREE(feats);
SG_FREE(labels);
}
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
test_libsvmfile_multilabel(fname_svm_multilabel);
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
#include <shogun/io/LineReader.h>
#include <shogun/lib/DelimiterTokenizer.h>
#include <shogun/lib/SGVector.h>
#include <shogun/io/SGIO.h>
#include <cstdio>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
FILE* fin=fopen("io_linereader.cpp", "r");
CDelimiterTokenizer* tokenizer=new CDelimiterTokenizer();
tokenizer->delimiters['\n']=1;
SG_REF(tokenizer);
CLineReader* reader=new CLineReader(fin, tokenizer);
int lines_count=0;
SGVector<char> tmp_string;
while (reader->has_next())
{
tmp_string=reader->read_line();
SG_SPRINT("%d %d ", lines_count, tmp_string.vlen);
for (int i=0; i<tmp_string.vlen; i++)
SG_SPRINT("%c", tmp_string.vector[i]);
SG_SPRINT("\n");
lines_count++;
}
SG_SPRINT("total lines: %d\n", lines_count);
tmp_string=SGVector<char>();
SG_UNREF(reader);
SG_UNREF(tokenizer);
fclose(fin);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CustomKernel.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/DataGenerator.h>
#include <shogun/features/IndexFeatures.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
void test_custom_kernel_subsets()
{
/* create some data */
index_t m=10;
CFeatures* features=
new CDenseFeatures<float64_t>(CDataGenerator::generate_mean_data(
m, 2, 1));
SG_REF(features);
/* create a custom kernel */
CKernel* k=new CGaussianKernel();
k->init(features, features);
CCustomKernel* l=new CCustomKernel(k);
/* create a random permutation */
SGVector<index_t> subset(m);
for (index_t run=0; run<100; ++run)
{
subset.range_fill();
CMath::permute(subset);
// subset.display_vector("permutation");
features->add_subset(subset);
k->init(features, features);
l->add_row_subset(subset);
l->add_col_subset(subset);
// k->get_kernel_matrix().display_matrix("K");
// l->get_kernel_matrix().display_matrix("L");
for (index_t i=0; i<m; ++i)
{
for (index_t j=0; j<m; ++j)
{
SG_SDEBUG("K(%d,%d)=%f, L(%d,%d)=%f\n", i, j, k->kernel(i, j), i, j,
l->kernel(i, j));
ASSERT(CMath::abs(k->kernel(i, j)-l->kernel(i, j))<10E-8);
}
}
features->remove_subset();
l->remove_row_subset();
l->remove_col_subset();
}
SG_UNREF(k);
SG_UNREF(l);
SG_UNREF(features);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
//sg_io->set_loglevel(MSG_DEBUG);
test_custom_kernel_subsets();
exit_shogun();
return 0;
}
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2014 pl8787
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*/
#include <shogun/base/init.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CustomKernel.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/DataGenerator.h>
#include <shogun/features/IndexFeatures.h>
using namespace shogun;
void test_custom_kernel_index_subsets()
{
/* create some data */
index_t m=10;
index_t num_sub_row=3;
index_t num_sub_col=2;
CFeatures* features=
new CDenseFeatures<float64_t>(CDataGenerator::generate_mean_data(
m, 2, 1));
SG_REF(features);
/* create a custom kernel */
CGaussianKernel* gaussian_kernel=new CGaussianKernel(2,10);
gaussian_kernel->init(features, features);
CCustomKernel* custom_kernel=new CCustomKernel(gaussian_kernel);
/* create random permutations */
SGVector<index_t> row_subset(num_sub_row);
SGVector<index_t> col_subset(num_sub_col);
row_subset.range_fill();
CMath::permute(row_subset);
col_subset.range_fill();
CMath::permute(col_subset);
/* create index features */
CIndexFeatures* row_idx_feat=new CIndexFeatures(row_subset);
CIndexFeatures* col_idx_feat=new CIndexFeatures(col_subset);
SG_REF(row_idx_feat);
SG_REF(col_idx_feat);
custom_kernel->init(row_idx_feat, col_idx_feat);
SGMatrix<float64_t> gaussian_kernel_matrix=
gaussian_kernel->get_kernel_matrix();
SGMatrix<float64_t> custom_kernel_matrix=
custom_kernel->get_kernel_matrix();
custom_kernel_matrix.display_matrix("subset");
SG_UNREF(gaussian_kernel);
SG_UNREF(custom_kernel);
SG_UNREF(row_idx_feat);
SG_UNREF(col_idx_feat);
SG_UNREF(features);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
test_custom_kernel_index_subsets();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CustomKernel.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/DataGenerator.h>
using namespace shogun;
void test_custom_kernel_subsets()
{
/* create some data */
index_t m=10;
CFeatures* features=
new CDenseFeatures<float64_t>(CDataGenerator::generate_mean_data(
m, 2, 1));
SG_REF(features);
/* create a custom kernel */
CKernel* k=new CGaussianKernel();
k->init(features, features);
CCustomKernel* l=new CCustomKernel(k);
/* create a random permutation */
SGVector<index_t> subset(m);
for (index_t run=0; run<100; ++run)
{
subset.range_fill();
CMath::permute(subset);
// subset.display_vector("permutation");
features->add_subset(subset);
k->init(features, features);
l->add_row_subset(subset);
l->add_col_subset(subset);
// k->get_kernel_matrix().display_matrix("K");
// l->get_kernel_matrix().display_matrix("L");
for (index_t i=0; i<m; ++i)
{
for (index_t j=0; j<m; ++j)
{
SG_SDEBUG("K(%d,%d)=%f, L(%d,%d)=%f\n", i, j, k->kernel(i, j), i, j,
l->kernel(i, j));
ASSERT(CMath::abs(k->kernel(i, j)-l->kernel(i, j))<10E-8);
}
}
features->remove_subset();
l->remove_row_subset();
l->remove_col_subset();
}
SG_UNREF(k);
SG_UNREF(l);
SG_UNREF(features);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
test_custom_kernel_subsets();
exit_shogun();
return 0;
}
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
#include <stdio.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char** argv)
{
init_shogun(&print_message);
// create some data
SGMatrix<float64_t> matrix(2,3);
for (int32_t i=0; i<6; i++)
matrix.matrix[i]=i;
// create three 2-dimensional vectors
// shogun will now own the matrix created
CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix);
// create gaussian kernel with cache 10MB, width 0.5
CGaussianKernel* kernel = new CGaussianKernel(features, features, 0.5, 10);
// print kernel matrix
for (int32_t i=0; i<3; i++)
{
for (int32_t j=0; j<3; j++)
{
SG_SPRINT("%f ", kernel->kernel(i,j));
}
SG_SPRINT("\n");
}
// free up memory
SG_UNREF(kernel);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/kernel/LinearKernel.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void test()
{
/* data matrix dimensions */
index_t num_vectors=6;
index_t num_features=2;
/* data means -1, 1 in all components, small std deviation */
SGVector<float64_t> mean_1(num_features);
SGVector<float64_t> mean_2(num_features);
SGVector<float64_t>::fill_vector(mean_1.vector, mean_1.vlen, -10.0);
SGVector<float64_t>::fill_vector(mean_2.vector, mean_2.vlen, 10.0);
float64_t sigma=0.5;
SGVector<float64_t>::display_vector(mean_1.vector, mean_1.vlen, "mean 1");
SGVector<float64_t>::display_vector(mean_2.vector, mean_2.vlen, "mean 2");
/* fill data matrix around mean */
SGMatrix<float64_t> train_dat(num_features, num_vectors);
for (index_t i=0; i<num_vectors; ++i)
{
for (index_t j=0; j<num_features; ++j)
{
float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0];
train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma);
}
}
SGMatrix<float64_t>::display_matrix(train_dat.matrix, train_dat.num_rows, train_dat.num_cols, "training data");
/* training features */
CDenseFeatures<float64_t>* features=
new CDenseFeatures<float64_t>(train_dat);
SG_REF(features);
/* training labels +/- 1 for each cluster */
SGVector<float64_t> lab(num_vectors);
for (index_t i=0; i<num_vectors; ++i)
lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0;
SGVector<float64_t>::display_vector(lab.vector, lab.vlen, "training labels");
CBinaryLabels* labels=new CBinaryLabels(lab);
SG_REF(labels);
/* evaluation instance */
CContingencyTableEvaluation* eval=new CContingencyTableEvaluation(ACCURACY);
/* kernel */
CKernel* kernel=new CLinearKernel();
kernel->init(features, features);
/* create svm via libsvm */
float64_t svm_C=10;
float64_t svm_eps=0.0001;
CLibSVM* svm=new CLibSVM(svm_C, kernel, labels);
svm->set_epsilon(svm_eps);
/* now train a few times on different subsets on data and assert that
* results are correct (data linear separable) */
svm->data_lock(labels, features);
SGVector<index_t> indices(5);
indices.vector[0]=1;
indices.vector[1]=2;
indices.vector[2]=3;
indices.vector[3]=4;
indices.vector[4]=5;
SGVector<index_t>::display_vector(indices.vector, indices.vlen, "training indices");
svm->train_locked(indices);
CBinaryLabels* output=CLabelsFactory::to_binary(svm->apply());
SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "apply() output");
SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels");
SG_SPRINT("accuracy: %f\n", eval->evaluate(output, labels));
ASSERT(eval->evaluate(output, labels)==1);
SG_UNREF(output);
SG_SPRINT("\n\n");
indices=SGVector<index_t>(3);
indices.vector[0]=1;
indices.vector[1]=2;
indices.vector[2]=3;
SGVector<index_t>::display_vector(indices.vector, indices.vlen, "training indices");
output=CLabelsFactory::to_binary(svm->apply());
SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "apply() output");
SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels");
SG_SPRINT("accuracy: %f\n", eval->evaluate(output, labels));
ASSERT(eval->evaluate(output, labels)==1);
SG_UNREF(output);
SG_SPRINT("\n\n");
indices=SGVector<index_t>(4);
indices.range_fill();
SGVector<index_t>::display_vector(indices.vector, indices.vlen, "training indices");
svm->train_locked(indices);
output=CLabelsFactory::to_binary(svm->apply());
SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "apply() output");
SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels");
SG_SPRINT("accuracy: %f\n", eval->evaluate(output, labels));
ASSERT(eval->evaluate(output, labels)==1);
SG_UNREF(output);
SG_SPRINT("normal train\n");
svm->data_unlock();
svm->train();
output=CLabelsFactory::to_binary(svm->apply());
ASSERT(eval->evaluate(output, labels)==1);
SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "output");
SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels");
SG_UNREF(output);
/* clean up */
SG_UNREF(svm);
SG_UNREF(features);
SG_UNREF(eval);
SG_UNREF(labels);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
test();
exit_shogun();
return 0;
}
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/DotKernel.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
#include <stdio.h>
using namespace shogun;
class CReverseLinearKernel : public CDotKernel
{
public:
/** default constructor */
CReverseLinearKernel() : CDotKernel(0)
{
}
/** destructor */
virtual ~CReverseLinearKernel()
{
}
/** initialize kernel
*
* @param l features of left-hand side
* @param r features of right-hand side
* @return if initializing was successful
*/
virtual bool init(CFeatures* l, CFeatures* r)
{
CDotKernel::init(l, r);
return init_normalizer();
}
/** load kernel init_data
*
* @param src file to load from
* @return if loading was successful
*/
virtual bool load_init(FILE* src)
{
return false;
}
/** save kernel init_data
*
* @param dest file to save to
* @return if saving was successful
*/
virtual bool save_init(FILE* dest)
{
return false;
}
/** return what type of kernel we are
*
* @return kernel type UNKNOWN (as it is not part
* officially part of shogun)
*/
virtual EKernelType get_kernel_type()
{
return K_UNKNOWN;
}
/** return the kernel's name
*
* @return name "Reverse Linear"
*/
inline virtual const char* get_name() const
{
return "ReverseLinear";
}
protected:
/** compute kernel function for features a and b
* idx_{a,b} denote the index of the feature vectors
* in the corresponding feature object
*
* @param idx_a index a
* @param idx_b index b
* @return computed kernel function at indices a,b
*/
virtual float64_t compute(int32_t idx_a, int32_t idx_b)
{
int32_t alen, blen;
bool afree, bfree;
float64_t* avec=
((CDenseFeatures<float64_t>*) lhs)->get_feature_vector(idx_a, alen, afree);
float64_t* bvec=
((CDenseFeatures<float64_t>*) rhs)->get_feature_vector(idx_b, blen, bfree);
ASSERT(alen==blen);
float64_t result=0;
for (int32_t i=0; i<alen; i++)
result+=avec[i]*bvec[alen-i-1];
((CDenseFeatures<float64_t>*) lhs)->free_feature_vector(avec, idx_a, afree);
((CDenseFeatures<float64_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree);
return result;
}
};
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char** argv)
{
init_shogun(&print_message);
// create some data
SGMatrix<float64_t> matrix(2,3);
for (int32_t i=0; i<6; i++)
matrix.matrix[i]=i;
// create three 2-dimensional vectors
// shogun will now own the matrix created
CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>();
features->set_feature_matrix(matrix);
// create reverse linear kernel
CReverseLinearKernel* kernel = new CReverseLinearKernel();
kernel->init(features,features);
// print kernel matrix
for (int32_t i=0; i<3; i++)
{
for (int32_t j=0; j<3; j++)
SG_SPRINT("%f ", kernel->kernel(i,j));
SG_SPRINT("\n");
}
// free up memory
SG_UNREF(kernel);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/labels/BinaryLabels.h>
using namespace shogun;
void test_sigmoid_fitting()
{
CBinaryLabels* labels=new CBinaryLabels(10);
labels->set_values(SGVector<float64_t>(labels->get_num_labels()));
for (index_t i=0; i<labels->get_num_labels(); ++i)
labels->set_value(i%2==0 ? 1 : -1, i);
labels->get_values().display_vector("scores");
labels->scores_to_probabilities();
labels->get_values().display_vector("probabilities");
SG_UNREF(labels);
}
int main()
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
test_sigmoid_fitting();
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
#include <shogun/lib/CircularBuffer.h>
#include <shogun/lib/DelimiterTokenizer.h>
#include <shogun/lib/SGVector.h>
#include <shogun/io/SGIO.h>
#include <cstdio>
#include <cstring>
using namespace shogun;
const int max_line_length = 256;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
SGVector<char> test_string(const_cast<char* >("all your bayes are belong to us! "), 33, false);
CCircularBuffer* buffer=new CCircularBuffer(max_line_length);
CDelimiterTokenizer* tokenizer=new CDelimiterTokenizer();
tokenizer->delimiters[' ']=1;
SG_REF(tokenizer);
buffer->set_tokenizer(tokenizer);
SGVector<char> tmp_string;
buffer->push(test_string);
int num_read;
index_t start;
while ((num_read=buffer->next_token_idx(start))>0)
{
buffer->skip_characters(start);
tmp_string=buffer->pop(num_read);
buffer->skip_characters(1);
for (int i=0; i<tmp_string.vlen; i++)
SG_SPRINT("%c", tmp_string.vector[i]);
SG_SPRINT("\n");
}
SG_UNREF(buffer);
SG_UNREF(tokenizer);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2009 Soeren Sonnenburg
* Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/io/SGIO.h>
#include <shogun/lib/Time.h>
#include <shogun/lib/ShogunException.h>
#include <shogun/mathematics/Math.h>
#include <shogun/lib/DynInt.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void print_warning(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void print_error(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void gen_ints(uint256_t* &a, uint32_t* &b, uint32_t len)
{
a=SG_MALLOC(uint256_t, len);
b=SG_MALLOC(uint32_t, len);
CMath::init_random(17);
for (uint32_t i=0; i<len; i++)
{
uint64_t r[4]={(uint64_t) CMath::random() << 32 | CMath::random(),
(uint64_t) CMath::random() << 32 | CMath::random(),
(uint64_t) CMath::random() << 32 | CMath::random(),
(uint64_t) CMath::random() << 32 | CMath::random()};
a[len-i-1]=r;
b[len-i-1]=i;
}
}
const int LEN = 5*1024;
int main()
{
init_shogun(&print_message, &print_warning,
&print_error);
try
{
uint256_t* a;
uint32_t* b;
CTime t;
t.io->set_loglevel(MSG_DEBUG);
SG_SPRINT("gen data..");
t.start();
gen_ints(a,b, LEN);
t.cur_time_diff(true);
SG_SPRINT("qsort..");
t.start();
CMath::qsort_index(a, b, LEN);
t.cur_time_diff(true);
SG_SPRINT("\n\n");
for (uint32_t i=0; i<10; i++)
{
SG_SPRINT("a[%d]=", i);
a[i].print_hex();
SG_SPRINT("\n");
}
SG_SPRINT("\n\n");
uint64_t val1[4]={1,2,3,4};
uint64_t val2[4]={5,6,7,8};
a[0]=val1;
a[1]=val2;
a[2]=a[0];
CMath::swap(a[0],a[1]);
printf("a[0]==a[1] %d\n", (int) (a[0] == a[1]));
printf("a[0]<a[1] %d\n", (int) (a[0] < a[1]));
printf("a[0]<=a[1] %d\n", (int) (a[0] <= a[1]));
printf("a[0]>a[1] %d\n", (int) (a[0] > a[1]));
printf("a[0]>=a[1] %d\n", (int) (a[0] >= a[1]));
printf("a[0]==a[0] %d\n", (int) (a[0] == a[0]));
printf("a[0]<a[0] %d\n", (int) (a[0] < a[0]));
printf("a[0]<=a[0] %d\n", (int) (a[0] <= a[0]));
printf("a[0]>a[0] %d\n", (int) (a[0] > a[0]));
printf("a[0]>=a[0] %d\n", (int) (a[0] >= a[0]));
SG_SPRINT("\n\n");
for (uint32_t i=0; i<10 ; i++)
{
SG_SPRINT("a[%d]=", i);
a[i].print_hex();
printf("\n");
}
SG_FREE(a);
SG_FREE(b);
}
catch(ShogunException & sh)
{
SG_SPRINT("%s",sh.get_exception_string());
}
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2009 Soeren Sonnenburg
* Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/io/SGIO.h>
#include <shogun/lib/common.h>
#include <shogun/lib/SGVector.h>
#include <shogun/base/DynArray.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
DynArray<int32_t> values;
for (int32_t i=0; i<1000; i++)
{
values.set_element(i,i);
}
for (int32_t i=0; i<1000; i++)
{
SG_SPRINT("values[%i]=%i\n", i, values[i]);
}
DynArray<SGVector<float64_t> > vectors(5);
for (int32_t i=0; i<20; i++)
{
SG_SPRINT("%i\n", i);
SGVector<float64_t> vec(i);
for (int32_t j=0; j<i; j++)
vec.vector[j]=j;
vectors.set_element(vec,i);
}
for (int32_t i=0; i<20; i++)
{
SG_SPRINT("%i\n", i);
vectors[i].display_vector();
}
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/lib/GCArray.h>
#include <shogun/kernel/Kernel.h>
#include <shogun/kernel/GaussianKernel.h>
#include <stdio.h>
using namespace shogun;
const int l=10;
int main(int argc, char** argv)
{
init_shogun();
// we need this scope, because exit_shogun() must not be called
// before the destructor of CGCArray<CKernel*> kernels!
{
// create array of kernels
CGCArray<CKernel*> kernels(l);
// fill array with kernels
for (int i=0; i<l; i++)
kernels.set(new CGaussianKernel(10, 1.0), i);
// print kernels
for (int i=0; i<l; i++)
{
CKernel* kernel = kernels.get(i);
printf("kernels[%d]=%p\n", i, kernel);
SG_UNREF(kernel);
}
}
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
#include <shogun/lib/Hash.h>
#include <stdio.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun();
uint8_t array[4]={0,1,2,3};
printf("hash(0)=%0x\n", CHash::MurmurHash3(&array[0], 1, 0xDEADBEAF));
printf("hash(1)=%0x\n", CHash::MurmurHash3(&array[1], 1, 0xDEADBEAF));
printf("hash(2)=%0x\n", CHash::MurmurHash3(&array[0], 2, 0xDEADBEAF));
printf("hash(3)=%0x\n", CHash::MurmurHash3(&array[0], 4, 0xDEADBEAF));
uint32_t h = 0xDEADBEAF;
uint32_t carry = 0;
CHash::IncrementalMurmurHash3(&h, &carry, &array[0], 1);
printf("inc_hash(0)=%0x\n", h);
CHash::IncrementalMurmurHash3(&h, &carry, &array[1], 1);
printf("inc_hash(1)=%0x\n", h);
CHash::IncrementalMurmurHash3(&h, &carry, &array[2], 1);
printf("inc_hash(2)=%0x\n", h);
CHash::IncrementalMurmurHash3(&h, &carry, &array[3], 1);
printf("inc_hash(3)=%0x\n", h);
h = CHash::FinalizeIncrementalMurmurHash3(h, carry, 4);
printf("Final inc_hash(3)=%0x\n", h);
exit_shogun();
return 0;
}
#include <shogun/lib/config.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/io/HDF5File.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
#ifdef HAVE_HDF5
CHDF5File* hdf = new CHDF5File((char*) "../data/australian.libsvm.h5",'r', "/data/data");
float64_t* mat;
int32_t num_feat;
int32_t num_vec;
hdf->get_matrix(mat, num_feat, num_vec);
SGMatrix<float64_t>::display_matrix(mat, num_feat, num_vec);
SG_FREE(mat);
SG_UNREF(hdf);
#endif
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/lib/memory.h>
#include <shogun/lib/IndirectObject.h>
#include <shogun/mathematics/Math.h>
#include <shogun/base/SGObject.h>
#include <stdio.h>
using namespace shogun;
const int l=10;
int main(int argc, char** argv)
{
init_shogun();
// create array a
int32_t* a=SG_MALLOC(int32_t, l);
for (int i=0; i<l; i++)
a[i]=l-i;
typedef CIndirectObject<int32_t, int32_t**> INDIRECT;
// create array of indirect objects pointing to array a
INDIRECT::set_array(&a);
INDIRECT* x = SG_MALLOC(INDIRECT, l);
INDIRECT::init_slice(x, l);
printf("created array a and indirect object array x pointing to a.\n\n");
for (int i=0; i<l; i++)
printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i]));
//sort the array
CMath::qsort(x, l);
printf("\n\nvoila! sorted indirect object array x, keeping a const.\n\n");
for (int i=0; i<l; i++)
printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i]));
SG_FREE(x);
SG_FREE(a);
exit_shogun();
return 0;
}
#include <shogun/lib/Map.h>
#include <shogun/io/SGIO.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
using namespace shogun;
#define SIZE 6
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char** argv)
{
init_shogun(&print_message, &print_message, &print_message);
const char* v[SIZE] = {"Russia", "England", "Germany", "USA", "France", "Spain"};
CMap<int32_t, const char*>* map = new CMap<int32_t, const char*>(SIZE/2, SIZE/2);
for (int i=0; i<SIZE; i++)
map->add(i, v[i]);
map->remove(0);
//SG_SPRINT("Num of elements: %d\n", map->get_num_elements());
for (int i=0; i<SIZE; i++)
{
if (map->contains(i))
;
//SG_SPRINT("key %d contains in map with index %d and data=%s\n",
// i, map->index_of(i), map->get_element(i));
}
SG_UNREF(map);
exit_shogun();
return 0;
}
#include <shogun/lib/config.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/io/MLDataHDF5File.h>
#include <shogun/io/SGIO.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
#if defined(HAVE_HDF5) && defined( HAVE_CURL)
CMLDataHDF5File* hdf = NULL;
try
{
hdf = new CMLDataHDF5File((char *)"australian", "/data/data");
}
catch (ShogunException& e)
{
SG_UNREF(hdf);
exit_shogun();
return 0;
}
float64_t* mat=NULL;
int32_t num_feat;
int32_t num_vec;
try
{
hdf->get_matrix(mat, num_feat, num_vec);
SGMatrix<float64_t>::display_matrix(mat, num_feat, num_vec);
}
catch (ShogunException& e)
{
SG_SWARNING("%s", e.get_exception_string());
}
SG_FREE(mat);
SG_UNREF(hdf);
#endif // HAVE_CURL && HAVE_HDF5
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
#include <shogun/io/SGIO.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/io/SerializableAsciiFile.h>
#include <shogun/features/SparseFeatures.h>
using namespace shogun;
int main(int argc, char** argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
/* create feature data matrix */
SGMatrix<int32_t> data(3, 20);
/* fill matrix with random data */
for (index_t i=0; i<20*3; ++i)
{
if (i%2==0)
data.matrix[i]=0;
else
data.matrix[i]=CMath::random(1, 9);
}
/* create sparse features */
CSparseFeatures<int32_t>* sparse_features=new CSparseFeatures<int32_t>(data);
CSerializableAsciiFile* file;
file=new CSerializableAsciiFile("sparseFeatures.txt", 'w');
sparse_features->save_serializable(file);
file->close();
SG_UNREF(file);
/* this will fail with a warning, same with CSerializableHdf5File and xml serialization*/
CSparseFeatures<int32_t>* sparse_features_loaded = new CSparseFeatures<int32_t>();
file = new CSerializableAsciiFile("sparseFeatures.txt", 'r');
sparse_features_loaded->load_serializable(file);
SG_UNREF(file);
SG_UNREF(sparse_features_loaded);
SG_UNREF(sparse_features);
exit_shogun();
}
#include <shogun/lib/Set.h>
#include <shogun/io/SGIO.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
using namespace shogun;
#define SIZE 8
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char** argv)
{
init_shogun(&print_message, &print_message, &print_message);
double v[SIZE] = {0.0,0.1,0.2,0.2,0.3,0.4,0.5,0.5};
CSet<double>* set = new CSet<double>(SIZE/2, SIZE/2);
for (int i=0; i<SIZE; i++)
set->add(v[i]);
set->remove(0.2);
//SG_SPRINT("Num of elements: %d\n", set->get_num_elements());
for (int i=0; i<SIZE; i++)
{
if (set->contains(v[i]))
;
//SG_SPRINT("%lg contains in set with index %d\n", v[i], set->index_of(v[i]));
}
SG_UNREF(set);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/mathematics/Statistics.h>
#include <shogun/mathematics/Math.h>
#include <shogun/lib/SGVector.h>
using namespace shogun;
void test()
{
/*
SGVector<float64_t> data(10);
SGVector<float64_t>::range_fill_vector(data.vector, data.vlen, 1.0);
float64_t low, up, mean;
float64_t error_prob=0.05;
mean=CStatistics::confidence_intervals_mean(data, error_prob, low, up);
SG_SPRINT("sample mean: %f. True mean lies in [%f,%f] with %f%%\n",
mean, low, up, 100*(1-error_prob));
*/
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Sergey Lisitsyn
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/lib/config.h>
#include <shogun/lib/SGVector.h>
#include <shogun/lib/SGMatrix.h>
#include <shogun/mathematics/Math.h>
#include <shogun/mathematics/lapack.h>
using namespace shogun;
#ifdef HAVE_LAPACK
bool is_equal(float64_t a, float64_t b, float64_t eps)
{
return CMath::abs(a-b)<=eps;
}
void test_ev()
{
SGMatrix<float64_t> A(3,3);
A(0,0)=0;
A(0,1)=1;
A(0,2)=0;
A(1,0)=1;
A(1,1)=0;
A(1,2)=1;
A(1,0)=0;
A(2,1)=1;
A(2,2)=0;
SGVector<float64_t> ev=SGMatrix<float64_t>::compute_eigenvectors(A);
SGMatrix<float64_t>::display_matrix(A.matrix, A.num_rows, A.num_cols, "A");
SGVector<float64_t>::display_vector(ev.vector, ev.vlen, "eigenvalues");
float64_t sqrt22=CMath::sqrt(2.0)/2.0;
float64_t eps=10E-16;
/* check for correct eigenvectors */
ASSERT(is_equal(A(0,0), 0.5, eps));
ASSERT(is_equal(A(0,1), -sqrt22, eps));
ASSERT(is_equal(A(0,2), 0.5, eps));
ASSERT(is_equal(A(1,0), -sqrt22, eps));
ASSERT(is_equal(A(1,1), 0, eps));
ASSERT(is_equal(A(1,2), sqrt22, eps));
ASSERT(is_equal(A(2,0), 0.5, eps));
ASSERT(is_equal(A(2,1), sqrt22, eps));
ASSERT(is_equal(A(2,2), 0.5, eps));
/* check for correct eigenvalues */
ASSERT(is_equal(ev[0], -sqrt22*2, eps));
ASSERT(is_equal(ev[1], 0, eps));
ASSERT(is_equal(ev[2], sqrt22*2, eps));
}
void test_matrix_multiply()
{
index_t n=10;
SGMatrix<float64_t> I=SGMatrix<float64_t>::create_identity_matrix(n,1.0);
index_t m=4;
SGMatrix<float64_t> A(n, m);
SGVector<float64_t>::range_fill_vector(A.matrix, m*n);
SGMatrix<float64_t>::display_matrix(I, "I");
SGMatrix<float64_t>::transpose_matrix(A.matrix, A.num_rows, A.num_cols);
SGMatrix<float64_t>::display_matrix(A, "A transposed");
SGMatrix<float64_t>::transpose_matrix(A.matrix, A.num_rows, A.num_cols);
SGMatrix<float64_t>::display_matrix(A, "A");
SG_SPRINT("multiply A by I and check result\n");
SGMatrix<float64_t> A2=SGMatrix<float64_t>::matrix_multiply(I, A);
ASSERT(A2.num_rows==A.num_rows);
ASSERT(A2.num_cols==A.num_cols);
SGMatrix<float64_t>::display_matrix(A2);
for (index_t i=0; i<A2.num_rows; ++i)
{
for (index_t j=0; j<A2.num_cols; ++j)
ASSERT(A(i,j)==A2(i,j));
}
SG_SPRINT("multiply A by transposed I and check result\n");
SGMatrix<float64_t> A3=SGMatrix<float64_t>::matrix_multiply(I, A, true);
ASSERT(A3.num_rows==I.num_rows);
ASSERT(A3.num_cols==A.num_cols);
SGMatrix<float64_t>::display_matrix(A3);
for (index_t i=0; i<A2.num_rows; ++i)
{
for (index_t j=0; j<A2.num_cols; ++j)
ASSERT(A(i,j)==A3(i,j));
}
SG_SPRINT("multiply transposed A by I and check result\n");
SGMatrix<float64_t> A4=SGMatrix<float64_t>::matrix_multiply(A, I, true, false);
ASSERT(A4.num_rows==A.num_cols);
ASSERT(A4.num_cols==I.num_cols);
SGMatrix<float64_t>::display_matrix(A4);
for (index_t i=0; i<A.num_rows; ++i)
{
for (index_t j=0; j<A.num_cols; ++j)
ASSERT(A(i,j)==A4(j,i));
}
SG_SPRINT("multiply A by scaled I and check result\n");
SGMatrix<float64_t> A5=SGMatrix<float64_t>::matrix_multiply(I, A, false, false, n);
ASSERT(A5.num_rows==I.num_rows);
ASSERT(A5.num_cols==A.num_cols);
SGMatrix<float64_t>::display_matrix(A5);
for (index_t i=0; i<A2.num_rows; ++i)
{
for (index_t j=0; j<A2.num_cols; ++j)
ASSERT(n*A(i,j)==A5(i,j));
}
}
void test_lapack()
{
// size of square matrix
int N = 100;
// square matrix
double* double_matrix = new double[N*N];
// for storing eigenpairs
double* double_eigenvalues = new double[N];
double* double_eigenvectors = new double[N*N];
// for SVD
double* double_U = new double[N*N];
double* double_s = new double[N];
double* double_Vt = new double[N*N];
// status (should be zero)
int status;
// DSYGVX
for (int i=0; i<N; i++)
{
for (int j=0; j<N; j++)
double_matrix[i*N+j] = ((double)(i-j))/(i+j+1);
double_matrix[i*N+i] += 100;
}
status = 0;
wrap_dsygvx(1,'V','U',N,double_matrix,N,double_matrix,N,1,3,double_eigenvalues,double_eigenvectors,&status);
if (status!=0)
SG_SERROR("DSYGVX/SSYGVX failed with code %d\n",status);
delete[] double_eigenvectors;
// DGEQRF+DORGQR
status = 0;
double* double_tau = new double[N];
wrap_dgeqrf(N,N,double_matrix,N,double_tau,&status);
wrap_dorgqr(N,N,N,double_matrix,N,double_tau,&status);
if (status!=0)
SG_SERROR("DGEQRF/DORGQR failed with code %d\n",status);
delete[] double_tau;
// DGESVD
for (int i=0; i<N; i++)
{
for (int j=0; j<N; j++)
double_matrix[i*N+j] = i*i+j*j;
}
status = 0;
wrap_dgesvd('A','A',N,N,double_matrix,N,double_s,double_U,N,double_Vt,N,&status);
if (status!=0)
SG_SERROR("DGESVD failed with code %d\n",status);
delete[] double_s;
delete[] double_U;
delete[] double_Vt;
// DSYEV
status = 0;
wrap_dsyev('V','U',N,double_matrix,N,double_eigenvalues,&status);
if (status!=0)
SG_SERROR("DSYEV failed with code %d\n",status);
delete[] double_eigenvalues;
delete[] double_matrix;
}
#endif // HAVE_LAPACK
int main(int argc, char** argv)
{
init_shogun_with_defaults();
#ifdef HAVE_LAPACK
SG_SPRINT("checking lapack\n");
test_lapack();
SG_SPRINT("compute_eigenvectors\n");
test_ev();
SG_SPRINT("matrix_multiply\n");
test_matrix_multiply();
#endif // HAVE_LAPACK
exit_shogun();
return 0;
}
#include <shogun/metric/LMNN.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/MulticlassLabels.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
// create features, each column is a feature vector
SGMatrix<float64_t> feat_mat(2,4);
// 1st feature vector
feat_mat(0,0)=0;
feat_mat(1,0)=0;
// 2nd feature vector
feat_mat(0,1)=0;
feat_mat(1,1)=-1;
// 3rd feature vector
feat_mat(0,2)=1;
feat_mat(1,2)=1;
// 4th feature vector
feat_mat(0,3)=-1;
feat_mat(1,3)=1;
// wrap feat_mat into Shogun features
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(feat_mat);
// create labels
SGVector<float64_t> lab_vec(4);
lab_vec[0]=0;
lab_vec[1]=0;
lab_vec[2]=1;
lab_vec[3]=1;
// two-class data, use MulticlassLabels because LMNN works in general for more than two classes
CMulticlassLabels* labels=new CMulticlassLabels(lab_vec);
// create LMNN metric machine
int32_t k=1; // number of target neighbors per example
CLMNN* lmnn=new CLMNN(features,labels,k);
// use the identity matrix as initial transform for LMNN
SGMatrix<float64_t> init_transform=SGMatrix<float64_t>::create_identity_matrix(2,1);
// set number of maximum iterations and train
lmnn->set_maxiter(1500);
// lmnn->io->set_loglevel(MSG_DEBUG);
lmnn->train(init_transform);
// lmnn->get_linear_transform().display_matrix("linear_transform");
CLMNNStatistics* statistics=lmnn->get_statistics();
/*
statistics->obj.display_vector("objective");
statistics->stepsize.display_vector("stepsize");
statistics->num_impostors.display_vector("num_impostors");
*/
SG_UNREF(statistics);
SG_UNREF(lmnn);
exit_shogun();
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
CModelSelectionParameters* create_param_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c=new CModelSelectionParameters("C1");
root->append_child(c);
c->build_values(1.0, 2.0, R_EXP);
CGaussianKernel* gaussian_kernel=new CGaussianKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
gaussian_kernel->print_modsel_params();
CModelSelectionParameters* param_gaussian_kernel=
new CModelSelectionParameters("kernel", gaussian_kernel);
root->append_child(param_gaussian_kernel);
CModelSelectionParameters* param_gaussian_kernel_width=
new CModelSelectionParameters("log_width");
param_gaussian_kernel_width->build_values(0.0, 0.5*CMath::log(2.0), R_LINEAR);
param_gaussian_kernel->append_child(param_gaussian_kernel_width);
return root;
}
void apply_parameter_tree(CDynamicObjectArray* combinations)
{
/* create some data */
SGMatrix<float64_t> matrix(2,3);
for (index_t i=0; i<6; i++)
matrix.matrix[i]=i;
/* create three 2-dimensional vectors
* to avoid deleting these, REF now and UNREF when finished */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix);
SG_REF(features);
/* create three labels, will be handed to svm and automaticall deleted */
CBinaryLabels* labels=new CBinaryLabels(3);
SG_REF(labels);
labels->set_label(0, -1);
labels->set_label(1, +1);
labels->set_label(2, -1);
/* create libsvm with C=10 and train */
CLibSVM* svm=new CLibSVM();
SG_REF(svm);
svm->set_labels(labels);
for (index_t i=0; i<combinations->get_num_elements(); ++i)
{
SG_SPRINT("applying:\n");
CParameterCombination* current_combination=(CParameterCombination*)
combinations->get_element(i);
current_combination->print_tree();
Parameter* current_parameters=svm->m_parameters;
current_combination->apply_to_modsel_parameter(current_parameters);
SG_UNREF(current_combination);
/* get kernel to set features, get_kernel SG_REF's the kernel */
CKernel* kernel=svm->get_kernel();
kernel->init(features, features);
svm->train();
/* classify on training examples */
for (index_t j=0; j<3; j++)
SG_SPRINT("output[%d]=%f\n", j, svm->apply_one(j));
/* unset features and SG_UNREF kernel */
kernel->cleanup();
SG_UNREF(kernel);
SG_SPRINT("----------------\n\n");
}
/* free up memory */
SG_UNREF(features);
SG_UNREF(labels);
SG_UNREF(svm);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
/* create example tree */
CModelSelectionParameters* tree=create_param_tree();
tree->print_tree();
SG_SPRINT("----------------------------------\n");
/* build combinations of parameter trees */
CDynamicObjectArray* combinations=tree->get_combinations();
apply_parameter_tree(combinations);
/* print and directly delete them all */
for (index_t i=0; i<combinations->get_num_elements(); ++i)
{
CParameterCombination* combination=(CParameterCombination*)
combinations->get_element(i);
SG_UNREF(combination);
}
SG_UNREF(combinations);
/* delete example tree (after processing of combinations because CSGObject
* (namely the kernel) of the tree is SG_UNREF'ed (and not REF'ed anywhere
* else) */
SG_UNREF(tree);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/CombinedFeatures.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/classifier/mkl/MKLClassification.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/PolyKernel.h>
#include <shogun/kernel/CombinedKernel.h>
using namespace shogun;
/** Creates a bunch of combined kernels with different sub-parameters.
* This can be used for modelselection of subkernel parameters of combined
* kernels
*/
CModelSelectionParameters* build_combined_kernel_parameter_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
/* kernel a should be Gaussian with certain parameters
* kernel b should be polynomial with certain parameters
* This will create a list of combined kernels with all parameter combinations
* All CList instances here do reference counting (also the combine_kernels
* method of CCombinedKernel
*/
CList* kernels_a=new CList(true);
CList* kernels_b=new CList(true);
int32_t cache_size=10;
kernels_a->append_element(new CGaussianKernel(cache_size, 2));
kernels_a->append_element(new CGaussianKernel(cache_size, 4));
kernels_b->append_element(new CPolyKernel(cache_size, 4));
kernels_b->append_element(new CPolyKernel(cache_size, 2));
CList* kernel_list=new CList();
kernel_list->append_element(kernels_a);
kernel_list->append_element(kernels_b);
CList* combinations=CCombinedKernel::combine_kernels(kernel_list);
/* add all created combined kernels to parameters tree */
/* cast is safe since the above method guarantees the type */
CCombinedKernel* current=(CCombinedKernel*)(combinations->get_first_element());
SG_SPRINT("combined kernel combinations:\n");
index_t i=0;
while (current)
{
/* print out current kernel's subkernels */
SG_SPRINT("combined kernel %d:\n", i++);
CGaussianKernel* gaussian=(CGaussianKernel*)current->get_kernel(0);
CPolyKernel* poly=(CPolyKernel*)current->get_kernel(1);
SG_SPRINT("kernel_a type: %s\n", poly->get_name());
SG_SPRINT("kernel_b type: %s\n", gaussian->get_name());
SG_SPRINT("kernel_a parameter: %d\n", poly->get_degree());
SG_SPRINT("kernel_b parameter: %f\n", gaussian->get_width());
SG_UNREF(poly);
SG_UNREF(gaussian);
CModelSelectionParameters* param_kernel=
new CModelSelectionParameters("kernel", current);
root->append_child(param_kernel);
SG_UNREF(current);
current=(CCombinedKernel*)(combinations->get_next_element());
}
SG_UNREF(combinations);
SG_UNREF(kernel_list);
SG_UNREF(kernels_a);
SG_UNREF(kernels_b);
return root;
}
void modelselection_combined_kernel()
{
int32_t num_subsets=3;
int32_t num_vectors=20;
int32_t dim_vectors=3;
/* create some data and labels */
SGMatrix<float64_t> matrix(dim_vectors, num_vectors);
CBinaryLabels* labels=new CBinaryLabels(num_vectors);
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
matrix.matrix[i]=CMath::randn_double();
/* create num_feautres 2-dimensional vectors */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix);
/* create combined features */
CCombinedFeatures* comb_features=new CCombinedFeatures();
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
/* create labels, two classes */
for (index_t i=0; i<num_vectors; ++i)
labels->set_label(i, i%2==0 ? 1 : -1);
/* create svm */
CMKL* classifier=new CMKLClassification(new CLibSVM());
classifier->set_interleaved_optimization_enabled(false);
/* splitting strategy */
CStratifiedCrossValidationSplitting* splitting_strategy=
new CStratifiedCrossValidationSplitting(labels, num_subsets);
/* accuracy evaluation */
CContingencyTableEvaluation* evaluation_criterium=
new CContingencyTableEvaluation(ACCURACY);
/* cross validation class for evaluation in model selection */
CCrossValidation* cross=new CCrossValidation(classifier, comb_features,
labels, splitting_strategy,
evaluation_criterium);
cross->set_num_runs(1);
/* TODO: remove this once locking is fixed for combined kernels */
cross->set_autolock(false);
/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
CModelSelectionParameters* param_tree=build_combined_kernel_parameter_tree();
param_tree->print_tree();
/* handles all of the above structures in memory */
CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
cross, param_tree);
bool print_state=true;
CParameterCombination* best_combination=grid_search->select_model(
print_state);
best_combination->print_tree();
best_combination->apply_to_machine(classifier);
/* print subkernel parameters, I know what the subkernel types are here */
CCombinedKernel* kernel=(CCombinedKernel*)classifier->get_kernel();
CGaussianKernel* gaussian=(CGaussianKernel*)kernel->get_kernel(0);
CPolyKernel* poly=(CPolyKernel*)kernel->get_kernel(1);
SG_SPRINT("gaussian width: %f\n", gaussian->get_width());
SG_SPRINT("poly degree: %d\n", poly->get_degree());
SG_UNREF(kernel);
SG_UNREF(gaussian);
SG_UNREF(poly);
/* larger number of runs to have tighter confidence intervals */
cross->set_num_runs(10);
// cross->set_conf_int_alpha(0.01);
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
SG_SPRINT("result: ");
result->print_result();
/* clean up destroy result parameter */
SG_UNREF(result);
SG_UNREF(best_combination);
SG_UNREF(grid_search);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
modelselection_combined_kernel();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/PowerKernel.h>
#include <shogun/distance/MinkowskiMetric.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
CModelSelectionParameters* create_param_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1=new CModelSelectionParameters("C1");
root->append_child(c1);
c1->build_values(-1.0, 1.0, R_EXP);
CModelSelectionParameters* c2=new CModelSelectionParameters("C2");
root->append_child(c2);
c2->build_values(-1.0, 1.0, R_EXP);
CGaussianKernel* gaussian_kernel=new CGaussianKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
gaussian_kernel->print_modsel_params();
CModelSelectionParameters* param_gaussian_kernel=
new CModelSelectionParameters("kernel", gaussian_kernel);
CModelSelectionParameters* gaussian_kernel_width=
new CModelSelectionParameters("log_width");
gaussian_kernel_width->build_values(-CMath::log(2.0), 0.0, R_LINEAR, 1.0);
param_gaussian_kernel->append_child(gaussian_kernel_width);
root->append_child(param_gaussian_kernel);
CPowerKernel* power_kernel=new CPowerKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
power_kernel->print_modsel_params();
CModelSelectionParameters* param_power_kernel=
new CModelSelectionParameters("kernel", power_kernel);
root->append_child(param_power_kernel);
CModelSelectionParameters* param_power_kernel_degree=
new CModelSelectionParameters("degree");
param_power_kernel_degree->build_values(1.0, 2.0, R_LINEAR);
param_power_kernel->append_child(param_power_kernel_degree);
CMinkowskiMetric* m_metric=new CMinkowskiMetric(10);
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
m_metric->print_modsel_params();
CModelSelectionParameters* param_power_kernel_metric1=
new CModelSelectionParameters("distance", m_metric);
param_power_kernel->append_child(param_power_kernel_metric1);
CModelSelectionParameters* param_power_kernel_metric1_k=
new CModelSelectionParameters("k");
param_power_kernel_metric1_k->build_values(1.0, 2.0, R_LINEAR);
param_power_kernel_metric1->append_child(param_power_kernel_metric1_k);
return root;
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
int32_t num_subsets=3;
int32_t num_vectors=20;
int32_t dim_vectors=3;
/* create some data and labels */
SGMatrix<float64_t> matrix(dim_vectors, num_vectors);
CBinaryLabels* labels=new CBinaryLabels(num_vectors);
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
matrix.matrix[i]=CMath::randn_double();
/* create num_feautres 2-dimensional vectors */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix);
/* create labels, two classes */
for (index_t i=0; i<num_vectors; ++i)
labels->set_label(i, i%2==0 ? 1 : -1);
/* create svm */
CLibSVM* classifier=new CLibSVM();
/* splitting strategy */
CStratifiedCrossValidationSplitting* splitting_strategy=
new CStratifiedCrossValidationSplitting(labels, num_subsets);
/* accuracy evaluation */
CContingencyTableEvaluation* evaluation_criterium=
new CContingencyTableEvaluation(ACCURACY);
/* cross validation class for evaluation in model selection */
CCrossValidation* cross=new CCrossValidation(classifier, features, labels,
splitting_strategy, evaluation_criterium);
cross->set_num_runs(1);
/* note that this automatically is not necessary since done automatically */
cross->set_autolock(true);
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
classifier->print_modsel_params();
/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
CModelSelectionParameters* param_tree=create_param_tree();
param_tree->print_tree();
/* handles all of the above structures in memory */
CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
cross, param_tree);
bool print_state=true;
CParameterCombination* best_combination=grid_search->select_model(
print_state);
best_combination->print_tree();
best_combination->apply_to_machine(classifier);
/* larger number of runs to have tighter confidence intervals */
cross->set_num_runs(10);
// cross->set_conf_int_alpha(0.01);
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
SG_SPRINT("result: ");
result->print_result();
/* now again but unlocked */
SG_UNREF(best_combination);
cross->set_autolock(true);
best_combination=grid_search->select_model(print_state);
best_combination->apply_to_machine(classifier);
SG_UNREF(result);
result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
SG_SPRINT("result (unlocked): ");
/* clean up destroy result parameter */
SG_UNREF(result);
SG_UNREF(best_combination);
SG_UNREF(grid_search);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/Labels.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/PolyKernel.h>
#include <shogun/regression/KernelRidgeRegression.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/CrossValidationSplitting.h>
#include <shogun/evaluation/MeanSquaredError.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ParameterCombination.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
CModelSelectionParameters* create_param_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* tau=new CModelSelectionParameters("tau");
root->append_child(tau);
tau->build_values(-1.0, 1.0, R_EXP);
CGaussianKernel* gaussian_kernel=new CGaussianKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
gaussian_kernel->print_modsel_params();
CModelSelectionParameters* param_gaussian_kernel=
new CModelSelectionParameters("kernel", gaussian_kernel);
CModelSelectionParameters* gaussian_kernel_width=
new CModelSelectionParameters("width");
gaussian_kernel_width->build_values(5.0, 8.0, R_EXP, 1.0, 2.0);
param_gaussian_kernel->append_child(gaussian_kernel_width);
root->append_child(param_gaussian_kernel);
CPolyKernel* poly_kernel=new CPolyKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
poly_kernel->print_modsel_params();
CModelSelectionParameters* param_poly_kernel=
new CModelSelectionParameters("kernel", poly_kernel);
root->append_child(param_poly_kernel);
CModelSelectionParameters* param_poly_kernel_degree=
new CModelSelectionParameters("degree");
param_poly_kernel_degree->build_values(2, 3, R_LINEAR);
param_poly_kernel->append_child(param_poly_kernel_degree);
return root;
}
void test_cross_validation()
{
/* data matrix dimensions */
index_t num_vectors=30;
index_t num_features=1;
/* training label data */
SGVector<float64_t> lab(num_vectors);
/* fill data matrix and labels */
SGMatrix<float64_t> train_dat(num_features, num_vectors);
CMath::range_fill_vector(train_dat.matrix, num_vectors);
for (index_t i=0; i<num_vectors; ++i)
{
/* labels are linear plus noise */
lab.vector[i]=i+CMath::normal_random(0, 1.0);
}
/* training features */
CDenseFeatures<float64_t>* features=
new CDenseFeatures<float64_t>(train_dat);
SG_REF(features);
/* training labels */
CLabels* labels=new CLabels(lab);
/* kernel ridge regression, only set labels for now, rest does not matter */
CKernelRidgeRegression* krr=new CKernelRidgeRegression(0, NULL, labels);
/* evaluation criterion */
CMeanSquaredError* eval_crit=
new CMeanSquaredError();
/* splitting strategy */
index_t n_folds=5;
CCrossValidationSplitting* splitting=
new CCrossValidationSplitting(labels, n_folds);
/* cross validation instance, 10 runs, 95% confidence interval */
CCrossValidation* cross=new CCrossValidation(krr, features, labels,
splitting, eval_crit);
cross->set_num_runs(3);
// cross->set_conf_int_alpha(0.05);
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
krr->print_modsel_params();
/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
CModelSelectionParameters* param_tree=create_param_tree();
param_tree->print_tree();
/* handles all of the above structures in memory */
CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
param_tree, cross);
/* print current combination */
bool print_state=true;
CParameterCombination* best_combination=grid_search->select_model(
print_state);
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();
best_combination->apply_to_machine(krr);
/* larger number of runs to have tighter confidence intervals */
cross->set_num_runs(10);
// cross->set_conf_int_alpha(0.01);
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
SG_SPRINT("result: ");
result->print_result();
/* clean up */
SG_UNREF(features);
SG_UNREF(best_combination);
SG_UNREF(result);
SG_UNREF(grid_search);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
test_cross_validation();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/lib/config.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/svm/LibLinear.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
CModelSelectionParameters* create_param_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1=new CModelSelectionParameters("C1");
root->append_child(c1);
c1->build_values(-2.0, 2.0, R_EXP);
CModelSelectionParameters* c2=new CModelSelectionParameters("C2");
root->append_child(c2);
c2->build_values(-2.0, 2.0, R_EXP);
return root;
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
#ifdef HAVE_LAPACK
int32_t num_subsets=5;
int32_t num_vectors=11;
/* create some data */
SGMatrix<float64_t> matrix(2, num_vectors);
for (int32_t i=0; i<num_vectors*2; i++)
matrix.matrix[i]=i;
/* create num_feautres 2-dimensional vectors */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix);
/* create three labels */
CBinaryLabels* labels=new CBinaryLabels(num_vectors);
for (index_t i=0; i<num_vectors; ++i)
labels->set_label(i, i%2==0 ? 1 : -1);
/* create linear classifier (use -s 2 option to avoid warnings) */
CLibLinear* classifier=new CLibLinear(L2R_L2LOSS_SVC);
/* splitting strategy */
CStratifiedCrossValidationSplitting* splitting_strategy=
new CStratifiedCrossValidationSplitting(labels, num_subsets);
/* accuracy evaluation */
CContingencyTableEvaluation* evaluation_criterium=
new CContingencyTableEvaluation(ACCURACY);
/* cross validation class for evaluation in model selection */
CCrossValidation* cross=new CCrossValidation(classifier, features, labels,
splitting_strategy, evaluation_criterium);
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
classifier->print_modsel_params();
/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
CModelSelectionParameters* param_tree=create_param_tree();
param_tree->print_tree();
/* handles all of the above structures in memory */
CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
cross, param_tree);
/* set autolocking to false to get rid of warnings */
cross->set_autolock(false);
CParameterCombination* best_combination=grid_search->select_model();
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();
best_combination->apply_to_machine(classifier);
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
result->print_result();
/* clean up */
SG_UNREF(result);
SG_UNREF(best_combination);
SG_UNREF(grid_search);
#endif // HAVE_LAPACK
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012-2014 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/mkl/MKLClassification.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CombinedKernel.h>
using namespace shogun;
CModelSelectionParameters* create_param_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1=new CModelSelectionParameters("C1");
root->append_child(c1);
c1->build_values(-1.0, 1.0, R_EXP);
CModelSelectionParameters* c2=new CModelSelectionParameters("C2");
root->append_child(c2);
c2->build_values(-1.0, 1.0, R_EXP);
CCombinedKernel* kernel1=new CCombinedKernel();
kernel1->append_kernel(new CGaussianKernel(10, 2));
kernel1->append_kernel(new CGaussianKernel(10, 3));
kernel1->append_kernel(new CGaussianKernel(10, 4));
CModelSelectionParameters* param_kernel1=
new CModelSelectionParameters("kernel", kernel1);
root->append_child(param_kernel1);
CCombinedKernel* kernel2=new CCombinedKernel();
kernel2->append_kernel(new CGaussianKernel(10, 20));
kernel2->append_kernel(new CGaussianKernel(10, 30));
kernel2->append_kernel(new CGaussianKernel(10, 40));
CModelSelectionParameters* param_kernel2=
new CModelSelectionParameters("kernel", kernel2);
root->append_child(param_kernel2);
return root;
}
void test()
{
int32_t num_subsets=3;
int32_t num_vectors=20;
int32_t dim_vectors=3;
/* create some data and labels */
SGMatrix<float64_t> matrix(dim_vectors, num_vectors);
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
matrix.matrix[i]=CMath::randn_double();
/* create feature object */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
features->set_feature_matrix(matrix);
/* create combined features */
CCombinedFeatures* comb_features=new CCombinedFeatures();
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
SG_REF(comb_features);
/* create labels, two classes */
CBinaryLabels* labels=new CBinaryLabels(num_vectors);
SG_REF(labels);
for (index_t i=0; i<num_vectors; ++i)
labels->set_label(i, i%2==0 ? +1 : -1);
/* works */
// /* create svm */
// CMKLClassification* classifier=new CMKLClassification(new CLibSVM());
// classifier->set_interleaved_optimization_enabled(false);
/* create svm */
CMKLClassification* classifier=new CMKLClassification();
// both fail:
//classifier->set_interleaved_optimization_enabled(false);
classifier->set_interleaved_optimization_enabled(true);
/* splitting strategy */
CStratifiedCrossValidationSplitting* splitting_strategy=
new CStratifiedCrossValidationSplitting(labels, num_subsets);
/* accuracy evaluation */
CContingencyTableEvaluation* evaluation_criterion=
new CContingencyTableEvaluation(ACCURACY);
/* cross validation class for evaluation in model selection */
CCrossValidation* cross=new CCrossValidation(classifier, comb_features,
labels, splitting_strategy, evaluation_criterion);
cross->set_num_runs(1);
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
classifier->print_modsel_params();
/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
CModelSelectionParameters* param_tree=create_param_tree();
param_tree->print_tree();
/* handles all of the above structures in memory */
CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
cross, param_tree);
// This unfortunately currently creates a NULL pointer read SEGFAULT :(
// reported on github: MKL Multiclass null pointer read
//bool print_state=true;
/*CParameterCombination* best_combination=grid_search->select_model(
print_state);
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();
best_combination->apply_to_machine(classifier);*/
/* larger number of runs to have tighter confidence intervals */
/*cross->set_num_runs(10);
cross->set_conf_int_alpha(0.01);
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
SG_SPRINT("result: %f", result->mean);*/
/* clean up */
SG_UNREF(comb_features);
SG_UNREF(labels);
//SG_UNREF(best_combination);
SG_UNREF(grid_search);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_INFO);
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/evaluation/MulticlassAccuracy.h>
#include <shogun/modelselection/ModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/multiclass/MulticlassLibSVM.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
CModelSelectionParameters* build_param_tree(CKernel* kernel)
{
CModelSelectionParameters * root=new CModelSelectionParameters();
CModelSelectionParameters * c=new CModelSelectionParameters("C");
root->append_child(c);
c->build_values(-1.0, 1.0, R_EXP);
CModelSelectionParameters * params_kernel=new CModelSelectionParameters("kernel", kernel);
root->append_child(params_kernel);
CModelSelectionParameters * params_kernel_width=new CModelSelectionParameters("log_width");
params_kernel_width->build_values(-CMath::log(2.0), 0.0, R_LINEAR);
params_kernel->append_child(params_kernel_width);
return root;
}
void test()
{
/* number of classes is dimension of data here to have some easy multiclass
* structure */
const unsigned int num_vectors=50;
const unsigned int dim_vectors=3;
// Heiko: increase number of classes and things will fail :(
// Sergey: the special buggy case of 3 classes was hopefully fixed
float64_t distance=5;
/* create data: some easy multiclass data */
SGMatrix<float64_t> feat=SGMatrix<float64_t>(dim_vectors, num_vectors);
SGVector<float64_t> lab(num_vectors);
for (index_t j=0; j<feat.num_cols; ++j)
{
lab[j]=j%dim_vectors;
for (index_t i=0; i<feat.num_rows; ++i)
feat(i, j)=CMath::randn_double();
/* make sure classes are (alomst) linearly seperable against each other */
feat(lab[j],j)+=distance;
}
/* shogun representation of above data */
CDenseFeatures<float64_t> * cfeatures=new CDenseFeatures<float64_t>(feat);
CMulticlassLabels * clabels=new CMulticlassLabels(lab);
float64_t sigma=2;
CGaussianKernel* kernel=new CGaussianKernel(10, sigma);
const float C=10.;
CMulticlassLibSVM* cmachine=new CMulticlassLibSVM(C, kernel, clabels);
CMulticlassAccuracy * eval_crit=new CMulticlassAccuracy();
/* k-fold stratified x-validation */
index_t k=3;
CStratifiedCrossValidationSplitting * splitting=
new CStratifiedCrossValidationSplitting(clabels, k);
CCrossValidation * cross=new CCrossValidation(cmachine, cfeatures, clabels,
splitting, eval_crit);
cross->set_num_runs(10);
// cross->set_conf_int_alpha(0.05);
/* create peramters for model selection */
CModelSelectionParameters* root=build_param_tree(kernel);
CGridSearchModelSelection * model_selection=new CGridSearchModelSelection(
cross, root);
bool print_state=true;
CParameterCombination * params=model_selection->select_model(print_state);
SG_SPRINT("best combination\n");
params->print_tree();
/* clean up memory */
SG_UNREF(model_selection);
SG_UNREF(params);
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/CrossValidation.h>
#include <shogun/evaluation/ContingencyTableEvaluation.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/modelselection/GridSearchModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/StringFeatures.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/kernel/string/DistantSegmentsKernel.h>
#include <shogun/lib/SGStringList.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
CModelSelectionParameters* create_param_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1=new CModelSelectionParameters("C1");
root->append_child(c1);
c1->build_values(1.0, 2.0, R_EXP);
CModelSelectionParameters* c2=new CModelSelectionParameters("C2");
root->append_child(c2);
c2->build_values(1.0, 2.0, R_EXP);
CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
ds_kernel->print_modsel_params();
CModelSelectionParameters* param_ds_kernel=
new CModelSelectionParameters("kernel", ds_kernel);
root->append_child(param_ds_kernel);
CModelSelectionParameters* ds_kernel_delta=
new CModelSelectionParameters("delta");
ds_kernel_delta->build_values(1, 2, R_LINEAR);
param_ds_kernel->append_child(ds_kernel_delta);
CModelSelectionParameters* ds_kernel_theta=
new CModelSelectionParameters("theta");
ds_kernel_theta->build_values(1, 2, R_LINEAR);
param_ds_kernel->append_child(ds_kernel_theta);
return root;
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
index_t num_strings=10;
index_t max_string_length=20;
index_t min_string_length=max_string_length/2;
index_t num_subsets=num_strings/3;
SGStringList<char> strings(num_strings, max_string_length);
for (index_t i=0; i<num_strings; ++i)
{
index_t len=CMath::random(min_string_length, max_string_length);
SGString<char> current(len);
SG_SPRINT("string %i: \"", i);
/* fill with random uppercase letters (ASCII) */
for (index_t j=0; j<len; ++j)
{
current.string[j]=(char)CMath::random('A', 'Z');
char* string=new char[2];
string[0]=current.string[j];
string[1]='\0';
SG_SPRINT("%s", string);
delete[] string;
}
SG_SPRINT("\"\n");
strings.strings[i]=current;
}
/* create num_feautres 2-dimensional vectors */
CStringFeatures<char>* features=new CStringFeatures<char>(strings, ALPHANUM);
/* create labels, two classes */
CBinaryLabels* labels=new CBinaryLabels(num_strings);
for (index_t i=0; i<num_strings; ++i)
labels->set_label(i, i%2==0 ? 1 : -1);
/* create svm classifier */
CLibSVM* classifier=new CLibSVM();
/* splitting strategy */
CStratifiedCrossValidationSplitting* splitting_strategy=
new CStratifiedCrossValidationSplitting(labels, num_subsets);
/* accuracy evaluation */
CContingencyTableEvaluation* evaluation_criterium=
new CContingencyTableEvaluation(ACCURACY);
/* cross validation class for evaluation in model selection */
CCrossValidation* cross=new CCrossValidation(classifier, features, labels,
splitting_strategy, evaluation_criterium);
cross->set_num_runs(2);
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
classifier->print_modsel_params();
/* model parameter selection, deletion is handled by modsel class (SG_UNREF) */
CModelSelectionParameters* param_tree=create_param_tree();
param_tree->print_tree();
/* handles all of the above structures in memory */
CGridSearchModelSelection* grid_search=new CGridSearchModelSelection(
cross, param_tree);
bool print_state=true;
CParameterCombination* best_combination=grid_search->select_model(
print_state);
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();
best_combination->apply_to_machine(classifier);
/* larger number of runs to have tighter confidence intervals */
cross->set_num_runs(10);
// cross->set_conf_int_alpha(0.01);
classifier->data_lock(labels, features);
CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate();
if (result->get_result_type() != CROSSVALIDATION_RESULT)
SG_SERROR("Evaluation result is not of type CCrossValidationResult!");
SG_SPRINT("result: ");
result->print_result();
/* clean up */
SG_UNREF(result);
SG_UNREF(best_combination);
SG_UNREF(grid_search);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/PowerKernel.h>
#include <shogun/distance/MinkowskiMetric.h>
#include <shogun/kernel/string/DistantSegmentsKernel.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
CModelSelectionParameters* build_complex_example_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c=new CModelSelectionParameters("C");
root->append_child(c);
c->build_values(1.0, 1.0, R_EXP);
CPowerKernel* power_kernel=new CPowerKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
power_kernel->print_modsel_params();
CModelSelectionParameters* param_power_kernel=
new CModelSelectionParameters("kernel", power_kernel);
root->append_child(param_power_kernel);
CModelSelectionParameters* param_power_kernel_degree=
new CModelSelectionParameters("degree");
param_power_kernel_degree->build_values(1.0, 1.0, R_EXP);
param_power_kernel->append_child(param_power_kernel_degree);
CMinkowskiMetric* m_metric=new CMinkowskiMetric(10);
CModelSelectionParameters* param_power_kernel_metric1=
new CModelSelectionParameters("distance", m_metric);
param_power_kernel->append_child(param_power_kernel_metric1);
CModelSelectionParameters* param_power_kernel_metric1_k=
new CModelSelectionParameters("k");
param_power_kernel_metric1_k->build_values(1.0, 12.0, R_LINEAR);
param_power_kernel_metric1->append_child(param_power_kernel_metric1_k);
CGaussianKernel* gaussian_kernel=new CGaussianKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
gaussian_kernel->print_modsel_params();
CModelSelectionParameters* param_gaussian_kernel=
new CModelSelectionParameters("kernel", gaussian_kernel);
root->append_child(param_gaussian_kernel);
CModelSelectionParameters* param_gaussian_kernel_width=
new CModelSelectionParameters("log_width");
param_gaussian_kernel_width->build_values(0.0, 0.5*CMath::log(2.0), R_LINEAR);
param_gaussian_kernel->append_child(param_gaussian_kernel_width);
CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
ds_kernel->print_modsel_params();
CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters("kernel",
ds_kernel);
root->append_child(param_ds_kernel);
CModelSelectionParameters* param_ds_kernel_delta=
new CModelSelectionParameters("delta");
param_ds_kernel_delta->build_values(1.0, 2.0, R_EXP);
param_ds_kernel->append_child(param_ds_kernel_delta);
CModelSelectionParameters* param_ds_kernel_theta=
new CModelSelectionParameters("theta");
param_ds_kernel_theta->build_values(1.0, 2.0, R_EXP);
param_ds_kernel->append_child(param_ds_kernel_theta);
return root;
}
CModelSelectionParameters* build_sgobject_no_childs_tree()
{
CPowerKernel* power_kernel=new CPowerKernel();
CModelSelectionParameters* param_power_kernel=
new CModelSelectionParameters("kernel", power_kernel);
return param_power_kernel;
}
CModelSelectionParameters* build_leaf_node_tree()
{
CModelSelectionParameters* c_1=new CModelSelectionParameters("C1");
c_1->build_values(1.0, 1.0, R_EXP);
return c_1;
}
CModelSelectionParameters* build_root_no_childs_tree()
{
return new CModelSelectionParameters();
}
CModelSelectionParameters* build_root_value_childs_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c_1=new CModelSelectionParameters("C1");
root->append_child(c_1);
c_1->build_values(1.0, 1.0, R_EXP);
CModelSelectionParameters* c_2=new CModelSelectionParameters("C2");
root->append_child(c_2);
c_2->build_values(1.0, 1.0, R_EXP);
return root;
}
CModelSelectionParameters* build_root_sg_object_child_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CPowerKernel* power_kernel=new CPowerKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
power_kernel->print_modsel_params();
CModelSelectionParameters* param_power_kernel=
new CModelSelectionParameters("kernel", power_kernel);
root->append_child(param_power_kernel);
return root;
}
CModelSelectionParameters* build_root_sg_object_child_value_child_tree()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CPowerKernel* power_kernel=new CPowerKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
power_kernel->print_modsel_params();
CModelSelectionParameters* param_power_kernel=
new CModelSelectionParameters("kernel", power_kernel);
CModelSelectionParameters* c=new CModelSelectionParameters("C");
root->append_child(c);
c->build_values(1.0, 1.0, R_EXP);
root->append_child(param_power_kernel);
return root;
}
void test_get_combinations(CModelSelectionParameters* tree)
{
tree->print_tree();
/* build combinations of parameter trees */
CDynamicObjectArray* combinations=tree->get_combinations();
/* print and directly delete them all */
SG_SPRINT("----------------------------------\n");
for (index_t i=0; i<combinations->get_num_elements(); ++i)
{
CParameterCombination* combination=(CParameterCombination*)
combinations->get_element(i);
combination->print_tree();
SG_UNREF(combination);
}
SG_UNREF(combinations);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
CModelSelectionParameters* tree;
tree=build_root_no_childs_tree();
SG_REF(tree);
test_get_combinations(tree);
SG_UNREF(tree);
tree=build_leaf_node_tree();
SG_REF(tree);
test_get_combinations(tree);
SG_UNREF(tree);
tree=build_sgobject_no_childs_tree();
SG_REF(tree);
test_get_combinations(tree);
SG_UNREF(tree);
tree=build_root_value_childs_tree();
SG_REF(tree);
test_get_combinations(tree);
SG_UNREF(tree);
tree=build_root_sg_object_child_tree();
SG_REF(tree);
test_get_combinations(tree);
SG_UNREF(tree);
tree=build_root_sg_object_child_value_child_tree();
SG_REF(tree);
test_get_combinations(tree);
SG_UNREF(tree);
tree=build_complex_example_tree();
SG_REF(tree);
test_get_combinations(tree);
SG_UNREF(tree);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/lib/DynamicObjectArray.h>
#include <shogun/lib/SGVector.h>
#include <stdlib.h>
using namespace std;
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void test_parameter_set_multiplication()
{
SG_SPRINT("\ntest_parameter_set_multiplication()\n");
DynArray<Parameter*> set1;
DynArray<Parameter*> set2;
SGVector<float64_t> param_vector(8);
SGVector<float64_t>::range_fill_vector(param_vector.vector, param_vector.vlen);
Parameter parameters[4];
parameters[0].add(¶m_vector.vector[0], "0");
parameters[0].add(¶m_vector.vector[1], "1");
set1.append_element(¶meters[0]);
parameters[1].add(¶m_vector.vector[2], "2");
parameters[1].add(¶m_vector.vector[3], "3");
set1.append_element(¶meters[1]);
parameters[2].add(¶m_vector.vector[4], "4");
parameters[2].add(¶m_vector.vector[5], "5");
set2.append_element(¶meters[2]);
parameters[3].add(¶m_vector.vector[6], "6");
parameters[3].add(¶m_vector.vector[7], "7");
set2.append_element(¶meters[3]);
DynArray<Parameter*>* result=new DynArray<Parameter*>();//CParameterCombination::parameter_set_multiplication(set1, set2);
for (index_t i=0; i<result->get_num_elements(); ++i)
{
Parameter* p=result->get_element(i);
for (index_t j=0; j<p->get_num_parameters(); ++j)
SG_SPRINT("%s ", p->get_parameter(j)->m_name);
SG_SPRINT("\n");
delete p;
}
delete result;
}
void test_leaf_sets_multiplication()
{
SG_SPRINT("\ntest_leaf_sets_multiplication()\n");
SGVector<float64_t> param_vector(6);
SGVector<float64_t>::range_fill_vector(param_vector.vector, param_vector.vlen);
CDynamicObjectArray sets;
CParameterCombination* new_root=new CParameterCombination();
SG_REF(new_root);
CDynamicObjectArray* current=new CDynamicObjectArray();
sets.append_element(current);
Parameter* p=new Parameter();
p->add(¶m_vector.vector[0], "0");
CParameterCombination* pc=new CParameterCombination(p);
current->append_element(pc);
p=new Parameter();
p->add(¶m_vector.vector[1], "1");
pc=new CParameterCombination(p);
current->append_element(pc);
/* first case: one element */
CDynamicObjectArray* result_simple=
CParameterCombination::leaf_sets_multiplication(sets, new_root);
SG_SPRINT("one set\n");
for (index_t i=0; i<result_simple->get_num_elements(); ++i)
{
CParameterCombination* tpc=(CParameterCombination*)
result_simple->get_element(i);
tpc->print_tree();
SG_UNREF(tpc);
}
SG_UNREF(result_simple);
/* now more elements are created */
current=new CDynamicObjectArray();
sets.append_element(current);
p=new Parameter();
p->add(¶m_vector.vector[2], "2");
pc=new CParameterCombination(p);
current->append_element(pc);
p=new Parameter();
p->add(¶m_vector.vector[3], "3");
pc=new CParameterCombination(p);
current->append_element(pc);
current=new CDynamicObjectArray();
sets.append_element(current);
p=new Parameter();
p->add(¶m_vector.vector[4], "4");
pc=new CParameterCombination(p);
current->append_element(pc);
p=new Parameter();
p->add(¶m_vector.vector[5], "5");
pc=new CParameterCombination(p);
current->append_element(pc);
/* second case: more element */
CDynamicObjectArray* result_complex=
CParameterCombination::leaf_sets_multiplication(sets, new_root);
SG_SPRINT("more sets\n");
for (index_t i=0; i<result_complex->get_num_elements(); ++i)
{
CParameterCombination* tpc=(CParameterCombination*)
result_complex->get_element(i);
tpc->print_tree();
SG_UNREF(tpc);
}
SG_UNREF(result_complex);
SG_UNREF(new_root);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
test_parameter_set_multiplication();
test_leaf_sets_multiplication();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011-2012 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/PowerKernel.h>
#include <shogun/kernel/LinearKernel.h>
#include <shogun/distance/MinkowskiMetric.h>
#include <shogun/distance/EuclideanDistance.h>
#include <shogun/kernel/string/DistantSegmentsKernel.h>
#include <shogun/machine/gp/ExactInferenceMethod.h>
#include <shogun/machine/gp/GaussianLikelihood.h>
#include <shogun/machine/gp/ZeroMean.h>
#include <shogun/regression/GaussianProcessRegression.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
using namespace shogun;
void test_tree(CModelSelectionParameters* tree)
{
SG_SPRINT("\n\ntree to process:\n");
tree->print_tree();
/* build combinations of parameter trees */
CDynamicObjectArray* combinations=tree->get_combinations();
/* print and directly delete them all */
SG_SPRINT("----------------------------------\n");
for (index_t i=0; i<combinations->get_num_elements(); ++i)
{
CParameterCombination* combination=
(CParameterCombination*)combinations->get_element(i);
combination->print_tree();
SG_UNREF(combination);
}
SG_UNREF(combinations);
}
CModelSelectionParameters* create_param_tree_1()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c=new CModelSelectionParameters("C");
root->append_child(c);
c->build_values(1, 2, R_EXP);
CPowerKernel* power_kernel=new CPowerKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
power_kernel->print_modsel_params();
CModelSelectionParameters* param_power_kernel=new CModelSelectionParameters(
"kernel", power_kernel);
root->append_child(param_power_kernel);
CModelSelectionParameters* param_power_kernel_degree=
new CModelSelectionParameters("degree");
param_power_kernel_degree->build_values(1, 2, R_EXP);
param_power_kernel->append_child(param_power_kernel_degree);
CMinkowskiMetric* m_metric=new CMinkowskiMetric(10);
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
m_metric->print_modsel_params();
CModelSelectionParameters* param_power_kernel_metrikernel_width_sigma_param=
new CModelSelectionParameters("distance", m_metric);
param_power_kernel->append_child(
param_power_kernel_metrikernel_width_sigma_param);
CModelSelectionParameters* param_power_kernel_metrikernel_width_sigma_param_k=
new CModelSelectionParameters("k");
param_power_kernel_metrikernel_width_sigma_param_k->build_values(1, 2,
R_LINEAR);
param_power_kernel_metrikernel_width_sigma_param->append_child(
param_power_kernel_metrikernel_width_sigma_param_k);
CGaussianKernel* gaussian_kernel=new CGaussianKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
gaussian_kernel->print_modsel_params();
CModelSelectionParameters* param_gaussian_kernel=
new CModelSelectionParameters("kernel", gaussian_kernel);
root->append_child(param_gaussian_kernel);
CModelSelectionParameters* param_gaussian_kernel_width=
new CModelSelectionParameters("log_width");
param_gaussian_kernel_width->build_values(0.0, 0.5*CMath::log(2), R_LINEAR);
param_gaussian_kernel->append_child(param_gaussian_kernel_width);
CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel();
/* print all parameter available for modelselection
* Dont worry if yours is not included, simply write to the mailing list */
ds_kernel->print_modsel_params();
CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters(
"kernel", ds_kernel);
root->append_child(param_ds_kernel);
CModelSelectionParameters* param_ds_kernel_delta=
new CModelSelectionParameters("delta");
param_ds_kernel_delta->build_values(1, 2, R_EXP);
param_ds_kernel->append_child(param_ds_kernel_delta);
CModelSelectionParameters* param_ds_kernel_theta=
new CModelSelectionParameters("theta");
param_ds_kernel_theta->build_values(1, 2, R_EXP);
param_ds_kernel->append_child(param_ds_kernel_theta);
return root;
}
CModelSelectionParameters* create_param_tree_2()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CPowerKernel* power_kernel=new CPowerKernel();
CModelSelectionParameters* param_power_kernel=new CModelSelectionParameters(
"kernel", power_kernel);
root->append_child(param_power_kernel);
CMinkowskiMetric* metric=new CMinkowskiMetric();
CModelSelectionParameters* param_power_kernel_metric=
new CModelSelectionParameters("distance", metric);
param_power_kernel->append_child(param_power_kernel_metric);
CModelSelectionParameters* param_metric_k=new CModelSelectionParameters(
"k");
param_metric_k->build_values(2, 3, R_LINEAR);
param_power_kernel_metric->append_child(param_metric_k);
CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel();
CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters(
"kernel", ds_kernel);
root->append_child(param_ds_kernel);
return root;
}
CModelSelectionParameters* create_param_tree_3()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CPowerKernel* power_kernel=new CPowerKernel();
CModelSelectionParameters* param_power_kernel=new CModelSelectionParameters(
"kernel", power_kernel);
root->append_child(param_power_kernel);
CMinkowskiMetric* metric=new CMinkowskiMetric();
CModelSelectionParameters* param_power_kernel_metric=
new CModelSelectionParameters("distance", metric);
param_power_kernel->append_child(param_power_kernel_metric);
CEuclideanDistance* euclidean=new CEuclideanDistance();
CModelSelectionParameters* param_power_kernel_distance=
new CModelSelectionParameters("distance", euclidean);
param_power_kernel->append_child(param_power_kernel_distance);
CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel();
CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters(
"kernel", ds_kernel);
root->append_child(param_ds_kernel);
return root;
}
CModelSelectionParameters* create_param_tree_4a()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
CRegressionLabels* labels=new CRegressionLabels();
CGaussianKernel* gaussian_kernel=new CGaussianKernel(10, 2);
CPowerKernel* power_kernel=new CPowerKernel();
CZeroMean* mean=new CZeroMean();
CGaussianLikelihood* lik=new CGaussianLikelihood();
CExactInferenceMethod* inf=new CExactInferenceMethod(gaussian_kernel, features,
mean, labels, lik);
CLibSVM* svm=new CLibSVM();
CPowerKernel* power_kernel_svm=new CPowerKernel();
CGaussianKernel* gaussian_kernel_svm=new CGaussianKernel(10, 2);
CModelSelectionParameters* param_inf=new CModelSelectionParameters(
"inference_method", inf);
root->append_child(param_inf);
CModelSelectionParameters* param_inf_gaussian=new CModelSelectionParameters(
"likelihood_model", lik);
param_inf->append_child(param_inf_gaussian);
CModelSelectionParameters* param_inf_kernel_1=new CModelSelectionParameters(
"kernel", gaussian_kernel);
param_inf->append_child(param_inf_kernel_1);
CModelSelectionParameters* param_inf_kernel_2=new CModelSelectionParameters(
"kernel", power_kernel);
param_inf->append_child(param_inf_kernel_2);
CModelSelectionParameters* param_svm=new CModelSelectionParameters(
"SVM", svm);
root->append_child(param_svm);
CModelSelectionParameters* param_svm_kernel_1=new CModelSelectionParameters(
"kernel", power_kernel_svm);
param_svm->append_child(param_svm_kernel_1);
CModelSelectionParameters* param_svm_kernel_2=new CModelSelectionParameters(
"kernel", gaussian_kernel_svm);
param_svm->append_child(param_svm_kernel_2);
return root;
}
CModelSelectionParameters* create_param_tree_4b()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
CRegressionLabels* labels=new CRegressionLabels();
CGaussianKernel* gaussian_kernel=new CGaussianKernel(10, 2);
CPowerKernel* power_kernel=new CPowerKernel();
CZeroMean* mean=new CZeroMean();
CGaussianLikelihood* lik=new CGaussianLikelihood();
CExactInferenceMethod* inf=new CExactInferenceMethod(gaussian_kernel, features,
mean, labels, lik);
CLibSVM* svm=new CLibSVM();
CPowerKernel* power_kernel_svm=new CPowerKernel();
CGaussianKernel* gaussian_kernel_svm=new CGaussianKernel(10, 2);
CModelSelectionParameters* param_c=new CModelSelectionParameters("C1");
root->append_child(param_c);
param_c->build_values(1,2,R_EXP);
CModelSelectionParameters* param_inf=new CModelSelectionParameters(
"inference_method", inf);
root->append_child(param_inf);
CModelSelectionParameters* param_inf_gaussian=new CModelSelectionParameters(
"likelihood_model", lik);
param_inf->append_child(param_inf_gaussian);
CModelSelectionParameters* param_inf_kernel_1=new CModelSelectionParameters(
"kernel", gaussian_kernel);
param_inf->append_child(param_inf_kernel_1);
CModelSelectionParameters* param_inf_kernel_2=new CModelSelectionParameters(
"kernel", power_kernel);
param_inf->append_child(param_inf_kernel_2);
CModelSelectionParameters* param_svm=new CModelSelectionParameters(
"SVM", svm);
root->append_child(param_svm);
CModelSelectionParameters* param_svm_kernel_1=new CModelSelectionParameters(
"kernel", power_kernel_svm);
param_svm->append_child(param_svm_kernel_1);
CModelSelectionParameters* param_svm_kernel_2=new CModelSelectionParameters(
"kernel", gaussian_kernel_svm);
param_svm->append_child(param_svm_kernel_2);
return root;
}
CModelSelectionParameters* create_param_tree_5()
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
CRegressionLabels* labels=new CRegressionLabels();
CGaussianKernel* gaussian_kernel=new CGaussianKernel(10, 2);
CLinearKernel* linear_kernel=new CLinearKernel();
CPowerKernel* power_kernel=new CPowerKernel();
CZeroMean* mean=new CZeroMean();
CGaussianLikelihood* lik=new CGaussianLikelihood();
CExactInferenceMethod* inf=new CExactInferenceMethod(gaussian_kernel, features,
mean, labels, lik);
CModelSelectionParameters* param_inf=new CModelSelectionParameters(
"inference_method", inf);
root->append_child(param_inf);
CModelSelectionParameters* param_inf_gaussian=new CModelSelectionParameters(
"likelihood_model", lik);
param_inf->append_child(param_inf_gaussian);
CModelSelectionParameters* param_inf_gaussian_sigma=
new CModelSelectionParameters("log_sigma");
param_inf_gaussian->append_child(param_inf_gaussian_sigma);
param_inf_gaussian_sigma->build_values(2.0*CMath::log(2.0), 3.0*CMath::log(2.0), R_LINEAR);
CModelSelectionParameters* param_inf_kernel_1=new CModelSelectionParameters(
"kernel", gaussian_kernel);
param_inf->append_child(param_inf_kernel_1);
CModelSelectionParameters* param_inf_kernel_width=
new CModelSelectionParameters("log_width");
param_inf_kernel_1->append_child(param_inf_kernel_width);
param_inf_kernel_width->build_values(0.0, 0.5*CMath::log(2.0), R_LINEAR);
CModelSelectionParameters* param_inf_kernel_2=new CModelSelectionParameters(
"kernel", linear_kernel);
param_inf->append_child(param_inf_kernel_2);
CModelSelectionParameters* param_inf_kernel_3=new CModelSelectionParameters(
"kernel", power_kernel);
param_inf->append_child(param_inf_kernel_3);
return root;
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
CModelSelectionParameters* tree=NULL;
tree=create_param_tree_1();
SG_REF(tree);
test_tree(tree);
SG_UNREF(tree);
tree=create_param_tree_2();
SG_REF(tree);
test_tree(tree);
SG_UNREF(tree);
tree=create_param_tree_3();
SG_REF(tree);
test_tree(tree);
SG_UNREF(tree);
tree=create_param_tree_4a();
SG_REF(tree);
test_tree(tree);
SG_UNREF(tree);
tree=create_param_tree_4b();
SG_REF(tree);
test_tree(tree);
SG_UNREF(tree);
tree=create_param_tree_5();
SG_REF(tree);
test_tree(tree);
SG_UNREF(tree);
exit_shogun();
return 0;
}
/*
* Copyright (c) 2014, Shogun Toolbox Foundation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Written (W) 2014 Khaled Nasr
*/
#include <shogun/base/init.h>
#include <shogun/mathematics/Math.h>
#include <shogun/features/DataGenerator.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/evaluation/MulticlassAccuracy.h>
#include <shogun/neuralnets/NeuralNetwork.h>
#include <shogun/neuralnets/NeuralLayers.h>
using namespace shogun;
int main(int, char*[])
{
init_shogun_with_defaults();
#ifdef HAVE_LAPACK // for CDataGenerator::generate_gaussian()
// initialize the random number generator with a fixed seed, for repeatability
CMath::init_random(10);
// Prepare the training data
const int num_classes = 4;
const int num_features = 10;
const int num_examples_per_class = 20;
SGMatrix<float64_t> X;
SGVector<float64_t> Y;
try
{
X = CDataGenerator::generate_gaussians(
num_examples_per_class,num_classes,num_features);
Y = SGVector<float64_t>(num_classes*num_examples_per_class);
}
catch (ShogunException e)
{
// out of memory
SG_SPRINT(e.get_exception_string());
return 0;
}
for (int32_t i = 0; i < num_classes; i++)
for (int32_t j = 0; j < num_examples_per_class; j++)
Y[i*num_examples_per_class + j] = i;
CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(X);
CMulticlassLabels* labels = new CMulticlassLabels(Y);
// Create a small network single hidden layer network
CNeuralLayers* layers = new CNeuralLayers();
layers->input(num_features)->rectified_linear(10)->softmax(num_classes);
CNeuralNetwork* network = new CNeuralNetwork(layers->done());
// initialize the network
network->quick_connect();
network->initialize_neural_network();
// uncomment this line to enable info logging
// network->io->set_loglevel(MSG_INFO);
// train using default parameters
network->set_labels(labels);
network->train(features);
// evaluate
CMulticlassLabels* predictions = network->apply_multiclass(features);
CMulticlassAccuracy* evaluator = new CMulticlassAccuracy();
float64_t accuracy = evaluator->evaluate(predictions, labels);
SG_SINFO("Accuracy = %f %\n", accuracy*100);
// Clean up
SG_UNREF(network);
SG_UNREF(layers);
SG_UNREF(features);
SG_UNREF(predictions);
SG_UNREF(evaluator);
#endif
exit_shogun();
return 0;
}
/*
* Copyright (c) 2014, Shogun Toolbox Foundation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Written (W) 2014 Khaled Nasr
*/
#include <shogun/base/init.h>
#include <shogun/mathematics/Math.h>
#include <shogun/features/DataGenerator.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/evaluation/MulticlassAccuracy.h>
#include <shogun/lib/DynamicObjectArray.h>
#include <shogun/neuralnets/NeuralNetwork.h>
#include <shogun/neuralnets/NeuralInputLayer.h>
#include <shogun/neuralnets/NeuralSoftmaxLayer.h>
#include <shogun/neuralnets/NeuralConvolutionalLayer.h>
using namespace shogun;
int main(int, char*[])
{
init_shogun_with_defaults();
#ifdef HAVE_LAPACK // for CDataGenerator::generate_gaussian()
// initialize the random number generator with a fixed seed, for repeatability
CMath::init_random(10);
// Prepare the training data
const int width = 4;
const int height = 4;
const int num_channels = 2;
const int num_features = width*height*num_channels;
const int num_classes = 3;
const int num_examples_per_class = 15;
SGMatrix<float64_t> X;
SGVector<float64_t> Y;
try
{
X = CDataGenerator::generate_gaussians(
num_examples_per_class,num_classes,num_features);
Y = SGVector<float64_t>(num_classes*num_examples_per_class);
}
catch (ShogunException e)
{
// out of memory
SG_SPRINT(e.get_exception_string());
return 0;
}
for (int32_t i = 0; i < num_classes; i++)
for (int32_t j = 0; j < num_examples_per_class; j++)
Y[i*num_examples_per_class + j] = i;
CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(X);
CMulticlassLabels* labels = new CMulticlassLabels(Y);
// prepare the layers
CDynamicObjectArray* layers = new CDynamicObjectArray();
// input layer
layers->append_element(new CNeuralInputLayer(width,height,num_channels));
// first convolutional layer: 3 feature maps, 3x3 masks, 2x2 max-pooling
layers->append_element(new CNeuralConvolutionalLayer(
CMAF_RECTIFIED_LINEAR, 3, 1,1, 2,2));
// second convolutional layer: 5 feature maps, 3x3 masks
layers->append_element(new CNeuralConvolutionalLayer(
CMAF_RECTIFIED_LINEAR, 5, 1,1));
// output layer
layers->append_element(new CNeuralSoftmaxLayer(num_classes));
// create and initialize the network
CNeuralNetwork* network = new CNeuralNetwork(layers);
network->quick_connect();
network->initialize_neural_network(0.1);
// uncomment this line to enable info logging
// network->io->set_loglevel(MSG_INFO);
// train using default parameters
network->set_labels(labels);
network->train(features);
// evaluate
CMulticlassLabels* predictions = network->apply_multiclass(features);
CMulticlassAccuracy* evaluator = new CMulticlassAccuracy();
float64_t accuracy = evaluator->evaluate(predictions, labels);
SG_SINFO("Accuracy = %f %\n", accuracy*100);
// Clean up
SG_UNREF(network);
SG_UNREF(features);
SG_UNREF(predictions);
SG_UNREF(evaluator);
#endif
exit_shogun();
return 0;
}
/*
* Copyright (c) 2014, Shogun Toolbox Foundation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Written (W) 2014 Khaled Nasr
*/
#include <shogun/base/init.h>
#include <shogun/mathematics/Math.h>
#include <shogun/features/DataGenerator.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/neuralnets/DeepAutoencoder.h>
#include <shogun/neuralnets/NeuralLayers.h>
using namespace shogun;
int main(int, char*[])
{
init_shogun_with_defaults();
#ifdef HAVE_LAPACK // for CDataGenerator::generate_gaussian()
// initialize the random number generator with a fixed seed, for repeatability
CMath::init_random(10);
// Prepare the training data
const int num_features = 20;
const int num_classes = 4;
const int num_examples_per_class = 20;
SGMatrix<float64_t> X;
try
{
X = CDataGenerator::generate_gaussians(
num_examples_per_class,num_classes,num_features);
}
catch (ShogunException e)
{
// out of memory
SG_SPRINT(e.get_exception_string());
return 0;
}
CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(X);
// Create a deep autoencoder
CNeuralLayers* layers = new CNeuralLayers();
layers
->input(num_features)
->rectified_linear(10)->rectified_linear(5)->rectified_linear(10)
->linear(num_features);
CDeepAutoencoder* ae = new CDeepAutoencoder(layers->done());
// uncomment this line to enable info logging
// ae->io->set_loglevel(MSG_INFO);
// pre-train
ae->pt_epsilon.set_const(1e-6);
ae->pre_train(features);
// fine-tune
ae->train(features);
// reconstruct the data
CDenseFeatures<float64_t>* reconstructions = ae->reconstruct(features);
SGMatrix<float64_t> X_reconstructed = reconstructions->get_feature_matrix();
// find the average difference between the data and the reconstructions
float64_t avg_diff = 0;
int32_t N = X.num_rows*X.num_cols;
for (int32_t i=0; i<N; i++)
avg_diff += CMath::abs(X[i]-X_reconstructed[i])/CMath::abs(X[i]);
avg_diff /= N;
SG_SINFO("Average difference = %f %\n", avg_diff*100);
// Clean up
SG_UNREF(ae);
SG_UNREF(layers);
SG_UNREF(features);
SG_UNREF(reconstructions);
#endif
exit_shogun();
return 0;
}
/*
* Copyright (c) 2014, Shogun Toolbox Foundation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* Written (W) 2014 Khaled Nasr
*/
#include <shogun/base/init.h>
#include <shogun/mathematics/Math.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/mathematics/Statistics.h>
#include <shogun/neuralnets/DeepBeliefNetwork.h>
using namespace shogun;
int main(int, char*[])
{
init_shogun_with_defaults();
// initialize the random number generator with a fixed seed, for repeatability
CMath::init_random(10);
// Prepare the training data
const int num_features = 5;
const int num_examples= 50;
SGVector<float64_t> means;
SGMatrix<float64_t> X;
try
{
means = SGVector<float64_t>(num_features);
X = SGMatrix<float64_t>(num_features, num_examples);
}
catch (ShogunException e)
{
// out of memory
SG_SPRINT(e.get_exception_string());
return 0;
}
for (int32_t i=0; i<num_features; i++)
means[i] = CMath::random(-1.0,1.0);
for (int32_t i=0; i<num_features; i++)
for (int32_t j=0; j<num_examples; j++)
X(i,j) = CMath::normal_random(means[i], 1.0);
CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(X);
// Create a DBN
CDeepBeliefNetwork* dbn = new CDeepBeliefNetwork(num_features, RBMVUT_GAUSSIAN);
dbn->add_hidden_layer(10);
dbn->add_hidden_layer(10);
dbn->add_hidden_layer(20);
dbn->initialize_neural_network();
// uncomment this line to enable info logging
// dbn->io->set_loglevel(MSG_INFO);
// pre-train
dbn->pt_max_num_epochs.set_const(100);
dbn->pt_cd_num_steps.set_const(10);
dbn->pt_gd_learning_rate.set_const(0.01);
dbn->pre_train(features);
// fine-tune
dbn->max_num_epochs = 100;
dbn->cd_num_steps = 10;
dbn->gd_learning_rate = 0.01;
dbn->train(features);
// draw 1000 samples from the DBN
CDenseFeatures<float64_t>* samples = dbn->sample(100,1000);
SGMatrix<float64_t> samples_matrix = samples->get_feature_matrix();
// compute the sample means
SGVector<float64_t> samples_means = CStatistics::matrix_mean(samples_matrix, false);
// compute the average difference between the sample means and the true means
float64_t avg_diff = 0;
for (int32_t i=0; i<num_features; i++)
avg_diff += CMath::abs(means[i]-samples_means[i]);
avg_diff /= num_features;
SG_SINFO("Average difference = %f\n", avg_diff);
// Clean up
SG_UNREF(dbn);
SG_UNREF(features);
SG_UNREF(samples);
exit_shogun();
return 0;
}
#include <cstdio>
#include <shogun/optimization/lbfgs/lbfgs.h>
static lbfgsfloatval_t evaluate(
void *instance,
const lbfgsfloatval_t *x,
lbfgsfloatval_t *g,
const int n,
const lbfgsfloatval_t step
)
{
int i;
lbfgsfloatval_t fx = 0.0;
for (i = 0;i < n;i += 2) {
lbfgsfloatval_t t1 = 1.0 - x[i];
lbfgsfloatval_t t2 = 10.0 * (x[i+1] - x[i] * x[i]);
g[i+1] = 20.0 * t2;
g[i] = -2.0 * (x[i] * g[i+1] + t1);
fx += t1 * t1 + t2 * t2;
}
return fx;
}
static int progress(
void *instance,
const lbfgsfloatval_t *x,
const lbfgsfloatval_t *g,
const lbfgsfloatval_t fx,
const lbfgsfloatval_t xnorm,
const lbfgsfloatval_t gnorm,
const lbfgsfloatval_t step,
int n,
int k,
int ls
)
{
printf("Iteration %d:\n", k);
printf(" fx = %f, x[0] = %f, x[1] = %f\n", fx, x[0], x[1]);
printf(" xnorm = %f, gnorm = %f, step = %f\n", xnorm, gnorm, step);
printf("\n");
return 0;
}
#define N 100
int main(int argc, char *argv[])
{
int i, ret = 0;
lbfgsfloatval_t fx;
lbfgsfloatval_t *x = lbfgs_malloc(N);
lbfgs_parameter_t param;
if (x == NULL) {
printf("ERROR: Failed to allocate a memory block for variables.\n");
return 1;
}
/* Initialize the variables. */
for (i = 0;i < N;i += 2) {
x[i] = -1.2;
x[i+1] = 1.0;
}
/* Initialize the parameters for the L-BFGS optimization. */
lbfgs_parameter_init(¶m);
/*param.linesearch = LBFGS_LINESEARCH_BACKTRACKING;*/
/*
Start the L-BFGS optimization; this will invoke the callback functions
evaluate() and progress() when necessary.
*/
ret = lbfgs(N, x, &fx, evaluate, progress, NULL, ¶m);
/* Report the result. */
printf("L-BFGS optimization terminated with status code = %d\n", ret);
printf(" fx = %f, x[0] = %f, x[1] = %f\n", fx, x[0], x[1]);
lbfgs_free(x);
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char** argv)
{
/* number of features and their dimension */
const int32_t n=6;
init_shogun(&print_message);
/* create some random data */
SGMatrix<float64_t> matrix(n,n);
for(int32_t i=0; i<n*n; ++i)
matrix.matrix[i]=CMath::random((float64_t)-n,(float64_t)n);
SGMatrix<float64_t>::display_matrix(matrix.matrix, n, n);
/* create n n-dimensional feature vectors */
CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix);
/* create gaussian kernel with cache 10MB, width will be changed later */
CGaussianKernel* kernel = new CGaussianKernel(10, 2.0);
kernel->init(features, features);
/* create n labels (+1,-1,+1,-1,...) */
CBinaryLabels* labels=new CBinaryLabels(n);
for (int32_t i=0; i<n; ++i)
labels->set_label(i, i%2==0 ? +1 : -1);
/* create libsvm with C=10 and produced labels */
CLibSVM* svm=new CLibSVM(10, kernel, labels);
/* iterate over different width parameters */
for (int32_t k=0; k<10; ++k)
{
float64_t width=CMath::pow(2.0,k);
float64_t log_width=CMath::log(width/2.0)/2.0;
/* create parameter to change current kernel width */
Parameter* param=new Parameter();
param->add(&log_width, "log_width", "");
/* tell kernel to use the newly produced parameter */
kernel->m_parameters->set_from_parameters(param);
SG_SPRINT("\n\ncurrent kernel width: 2^%d=%f\n", k, kernel->get_width());
/* print kernel matrix */
for (int32_t i=0; i<n; i++)
{
for (int32_t j=0; j<n; j++)
SG_SPRINT("%f ", kernel->kernel(i,j));
SG_SPRINT("\n");
}
/* train and classify */
svm->train();
for (int32_t i=0; i<n; ++i)
SG_SPRINT("output[%d]=%f\treal[%d]=%f\n", i,
svm->apply_one(i), i, labels->get_label(i));
delete param;
}
/* free up memory */
SG_UNREF(svm);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/labels/BinaryLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
/* number of features and their dimension, number of kernels */
int main(int argc, char** argv)
{
const int32_t n=7;
init_shogun(&print_message);
/* create some random data and hand it to each kernel */
SGMatrix<float64_t> matrix(n,n);
for (int32_t k=0; k<n*n; ++k)
matrix.matrix[k]=CMath::random((float64_t) -n, (float64_t) n);
SG_SPRINT("feature data:\n");
SGMatrix<float64_t>::display_matrix(matrix.matrix, n, n);
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix);
/* create n kernels with n features each */
CGaussianKernel** kernels=SG_MALLOC(CGaussianKernel*, n);
for (int32_t i=0; i<n; ++i)
{
kernels[i]=new CGaussianKernel(10, CMath::random(0.0, (float64_t)n*n));
/* hand data to kernel */
kernels[i]->init(features, features);
}
/* create n parameter instances, each with one kernel */
Parameter** parameters=SG_MALLOC(Parameter*, n);
for (int32_t i=0; i<n; ++i)
{
parameters[i]=new Parameter();
parameters[i]->add((CSGObject**)&kernels[i], "kernel", "");
}
/* create n labels (+1,-1,+1,-1,...) */
CBinaryLabels* labels=new CBinaryLabels(n);
for (int32_t i=0; i<n; ++i)
labels->set_label(i, i%2==0 ? +1 : -1);
/* create libsvm with C=10 and produced labels */
CLibSVM* svm=new CLibSVM(10, NULL, labels);
/* iterate over all parameter instances and set them as subkernel */
for (int32_t k=0; k<n; ++k)
{
SG_SPRINT("\nkernel %d has width %f\n", k, kernels[k]->get_width());
/* change kernel, old one is UNREF'ed, new one is REF'ed */
svm->m_parameters->set_from_parameters(parameters[k]);
/* train and classify with the different kernels */
svm->train();
for (int32_t i=0; i<n; ++i)
SG_SPRINT("output[%d]=%f\treal[%d]=%f\n", i,
svm->apply_one(i), i, labels->get_label(i));
}
/* free up memory: delete all Parameter instances */
for (int32_t i=0; i<n; ++i)
delete parameters[i];
/* delete created arrays */
SG_FREE(kernels);
SG_FREE(parameters);
/* this also handles features, labels, and last kernel in kernels[n-1] */
SG_UNREF(svm);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/lib/config.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/kernel/string/DistantSegmentsKernel.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/PowerKernel.h>
#include <shogun/distance/MinkowskiMetric.h>
#include <shogun/lib/SGStringList.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
void print_modsel_parameters(CSGObject* object)
{
SGStringList<char> modsel_params=object->get_modelsel_names();
SG_SPRINT("Parameters of %s available for model selection:\n",
object->get_name());
char* type_string=SG_MALLOC(char, 100);
for (index_t i=0; i<modsel_params.num_strings; ++i)
{
/* extract current name, ddescription and type, and print them */
const char* name=modsel_params.strings[i].string;
index_t index=object->get_modsel_param_index(name);
TSGDataType type=object->m_model_selection_parameters->get_parameter(
index)->m_datatype;
type.to_string(type_string, 100);
SG_SPRINT("\"%s\": \"%s\", %s\n", name,
object->get_modsel_param_descr(name), type_string);
}
SG_FREE(type_string);
SG_SPRINT("\n");
}
int main(int argc, char** argv)
{
init_shogun(&print_message);
#ifndef HAVE_LAPACK
CSGObject* object;
object=new CLibSVM();
print_modsel_parameters(object);
SG_UNREF(object);
object=new CLibLinear();
print_modsel_parameters(object);
SG_UNREF(object);
object=new CDistantSegmentsKernel();
print_modsel_parameters(object);
SG_UNREF(object);
object=new CGaussianKernel();
print_modsel_parameters(object);
SG_UNREF(object);
object=new CPowerKernel();
print_modsel_parameters(object);
SG_UNREF(object);
object=new CMinkowskiMetric();
print_modsel_parameters(object);
SG_UNREF(object);
#endif // HAVE_LAPACK
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/io/SGIO.h>
#include <shogun/mathematics/Math.h>
#include <shogun/base/Parameter.h>
#include <shogun/kernel/string/DistantSegmentsKernel.h>
#include <shogun/kernel/GaussianKernel.h>
using namespace shogun;
int32_t max=3;
const float64_t initial_value=1;
const float64_t another_value=2;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
bool test_float_scalar()
{
bool result=true;
Parameter* original_parameter_list=new Parameter();
float64_t original_parameter=initial_value;
original_parameter_list->add(&original_parameter, "param", "");
float64_t new_parameter=another_value;
Parameter* new_parameter_list=new Parameter();
new_parameter_list->add(&new_parameter, "param", "");
original_parameter_list->set_from_parameters(new_parameter_list);
result&=original_parameter==another_value;
delete original_parameter_list;
delete new_parameter_list;
return result;
}
bool test_float_vector()
{
bool result=true;
Parameter* original_parameter_list=new Parameter();
float64_t* original_parameter=SG_MALLOC(float64_t, max);
SGVector<float64_t>::fill_vector(original_parameter, max, initial_value);
original_parameter_list->add_vector(&original_parameter, &max, "param", "");
float64_t* new_parameter=SG_MALLOC(float64_t, max);
SGVector<float64_t>::fill_vector(new_parameter, max, another_value);
Parameter* new_parameter_list=new Parameter();
new_parameter_list->add_vector(&new_parameter, &max, "param", "");
original_parameter_list->set_from_parameters(new_parameter_list);
for (int32_t i=0; i<max; ++i)
result&=original_parameter[i]==another_value;
delete original_parameter;
delete new_parameter;
delete original_parameter_list;
delete new_parameter_list;
return result;
}
bool test_float_matrix()
{
bool result=true;
Parameter* original_parameter_list=new Parameter();
float64_t* original_parameter=SG_MALLOC(float64_t, max*max);
SGVector<float64_t>::fill_vector(original_parameter, max*max, initial_value);
original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", "");
float64_t* new_parameter=SG_MALLOC(float64_t, max*max);
SGVector<float64_t>::fill_vector(new_parameter, max*max, another_value);
Parameter* new_parameter_list=new Parameter();
new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", "");
original_parameter_list->set_from_parameters(new_parameter_list);
for (int32_t i=0; i<max*max; ++i)
result&=original_parameter[i]==another_value;
delete original_parameter;
delete new_parameter;
delete original_parameter_list;
delete new_parameter_list;
return result;
}
bool test_sgobject_scalar()
{
bool result=true;
Parameter* original_parameter_list=new Parameter();
CSGObject* original_parameter=new CGaussianKernel(10, 10);
SG_REF(original_parameter);
original_parameter_list->add(&original_parameter, "kernel", "");
CSGObject* new_parameter=new CDistantSegmentsKernel(10, 10, 10);
Parameter* new_parameter_list=new Parameter();
new_parameter_list->add(&new_parameter, "kernel", "");
/* note: old_parameter is SG_UNREF'ed, new one SG_REF'ed */
original_parameter_list->set_from_parameters(new_parameter_list);
result&=original_parameter==new_parameter;
/* old original kernel was deleted by shogun's SG_UNREF */
SG_UNREF(new_parameter);
delete original_parameter_list;
delete new_parameter_list;
return result;
}
bool test_sgobject_vector()
{
bool result=true;
Parameter* original_parameter_list=new Parameter();
CSGObject** original_parameter=SG_MALLOC(CSGObject*, max);
for (int32_t i=0; i<max; ++i)
{
original_parameter[i]=new CDistantSegmentsKernel(1, 1, 1);
SG_REF(original_parameter[i]);
}
original_parameter_list->add_vector(&original_parameter, &max, "param", "");
CSGObject** new_parameter=SG_MALLOC(CSGObject*, max);
for (int32_t i=0; i<max; ++i)
new_parameter[i]=new CDistantSegmentsKernel(2, 2, 2);
Parameter* new_parameter_list=new Parameter();
new_parameter_list->add_vector(&new_parameter, &max, "param", "");
/* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */
original_parameter_list->set_from_parameters(new_parameter_list);
for (int32_t i=0; i<max; ++i)
result&=original_parameter[i]==new_parameter[i];
/* old original kernels were deleted by shogun's SG_UNREF */
delete original_parameter;
for (int32_t i=0; i<max; ++i)
SG_UNREF(new_parameter[i]);
delete new_parameter;
delete original_parameter_list;
delete new_parameter_list;
return result;
}
bool test_sgobject_matrix()
{
bool result=true;
Parameter* original_parameter_list=new Parameter();
CSGObject** original_parameter=SG_MALLOC(CSGObject*, max*max);
for (int32_t i=0; i<max; ++i)
{
for (int32_t j=0; j<max; ++j)
{
original_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1);
SG_REF(original_parameter[j*max+i]);
}
}
original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", "");
CSGObject** new_parameter=SG_MALLOC(CSGObject*, max*max);
for (int32_t i=0; i<max; ++i)
{
for (int32_t j=0; j<max; ++j)
new_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1);
}
Parameter* new_parameter_list=new Parameter();
new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", "");
/* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */
original_parameter_list->set_from_parameters(new_parameter_list);
for (int32_t i=0; i<max; ++i)
{
for (int32_t j=0; j<max; ++j)
result&=original_parameter[j*max+i]==new_parameter[j*max+i];
}
/* old original kernels were deleted by shogun's SG_UNREF */
delete original_parameter;
for (int32_t i=0; i<max*max; ++i)
SG_UNREF(new_parameter[i]);
delete new_parameter;
delete original_parameter_list;
delete new_parameter_list;
return result;
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
bool result=true;
/* test wheater set_from_parameters works for these types */
result&=test_float_scalar();
result&=test_sgobject_scalar();
result&=test_sgobject_vector();
result&=test_sgobject_matrix();
result&=test_float_matrix();
result&=test_float_vector();
if (result)
SG_SPRINT("SUCCESS!\n")
else
SG_SPRINT("FAILURE!\n")
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2014 Abhijeet Kislay
* Copyright (C) 2014 Abhijeet Kislay
*/
#include <shogun/base/init.h>
#include <shogun/lib/config.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/preprocessor/FisherLDA.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/lib/common.h>
#include <shogun/features/DataGenerator.h>
using namespace shogun;
#define NUM 50
#define DIMS 2
#define CLASSES 2
void test()
{
SGVector<float64_t> lab(CLASSES*NUM);
SGMatrix<float64_t> feat(DIMS, CLASSES*NUM);
feat=CDataGenerator::generate_gaussians(NUM,CLASSES,DIMS);
for(int i=0; i<CLASSES; ++i)
for(int j=0; j<NUM; ++j)
lab[i*NUM+j]=double(i);
// Create train labels
CMulticlassLabels* labels=new CMulticlassLabels(lab);
SG_REF(labels)
// Create train features
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(feat);
SG_REF(features)
// Initiate the FisherLDA class
CFisherLDA* fisherlda=new CFisherLDA(AUTO_FLDA);
SG_REF(fisherlda)
fisherlda->fit(features, labels, 1);
SGMatrix<float64_t> y=fisherlda->apply_to_feature_matrix(features);
// display output
y.display_matrix();
SG_UNREF(fisherlda)
SG_UNREF(features)
SG_UNREF(labels)
}
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2008-2010 Soeren Sonnenburg, Alexander Binder
* Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max Planck Society
* Copyright (C) 2010 Berlin Institute of Technology
*/
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/LinearKernel.h>
#include <shogun/preproc/RandomFourierGaussPreproc.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/classifier/svm/LibSVM.h>
#include <shogun/lib/Mathematics.h>
#include <shogun/lib/common.h>
#include <shogun/base/init.h>
#include <stdlib.h>
#include <stdio.h>
#include <vector>
#include <iostream>
#include <algorithm>
#include <ctime>
using namespace shogun;
void gen_rand_data(float64_t* & feat, float64_t* & lab,const int32_t num,const int32_t dims,const float64_t dist)
{
lab=SG_MALLOC(float64_t, num);
feat=SG_MALLOC(float64_t, num*dims);
for (int32_t i=0; i<num; i++)
{
if (i<num/2)
{
lab[i]=-1.0;
for (int32_t j=0; j<dims; j++)
feat[i*dims+j]=CMath::random(0.0,1.0)+dist;
}
else
{
lab[i]=1.0;
for (int32_t j=0; j<dims; j++)
feat[i*dims+j]=CMath::random(0.0,1.0)-dist;
}
}
CMath::display_vector(lab,num);
CMath::display_matrix(feat,dims, num);
}
int main()
{
time_t a,b;
int32_t dims=6000;
float64_t dist=0.5;
int32_t randomfourier_featurespace_dim=500; // the typical application of the below preprocessor are cases with high input dimensionalities of some thousands
int32_t numtr=3000;
int32_t numte=3000;
const int32_t feature_cache=0;
const int32_t kernel_cache=0;
// important trick for RFgauss to work: kernel width is set such that average inner kernel distance is close one
// the rfgauss approximation breaks down if average inner kernel distances (~~ kernel width to small compared to variance of data) are too large
// try rbf_width=0.1 to see how it fails! - you will see the problem in the large number of negative kernel entries (numnegratio) for the rfgauss linear kernel
const float64_t rbf_width=4000;
const float64_t svm_C=10;
const float64_t svm_eps=0.001;
init_shogun();
float64_t* feattr(NULL);
float64_t* labtr(NULL);
a=time(NULL);
std::cout << "generating train data"<<std::endl;
gen_rand_data(feattr,labtr,numtr,dims,dist);
float64_t* feattr2=SG_MALLOC(float64_t, numtr*dims);
std::copy(feattr,feattr+numtr*dims,feattr2);
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
float64_t* featte(NULL);
float64_t* labte(NULL);
a=time(NULL);
std::cout << "generating test data"<<std::endl;
gen_rand_data(featte,labte,numte,dims,dist);
float64_t* featte2=SG_MALLOC(float64_t, numtr*dims);
std::copy(featte,featte+numtr*dims,featte2);
float64_t* featte3=SG_MALLOC(float64_t, numtr*dims);
std::copy(featte,featte+numtr*dims,featte3);
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
// create train labels
CLabels* labelstr=new CLabels();
labelstr->set_labels(labtr, numtr);
SG_REF(labelstr);
// create train features
a=time(NULL);
std::cout << "initializing shogun train feature"<<std::endl;
CDenseFeatures<float64_t>* featurestr1 = new CDenseFeatures<float64_t>(feature_cache);
SG_REF(featurestr1);
featurestr1->set_feature_matrix(feattr, dims, numtr);
std::cout << "finished"<<std::endl;
//b=time(NULL);
//std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
// create gaussian kernel
// std::cout << "computing gaussian train kernel"<<std::endl;
CGaussianKernel* kerneltr1 = new CGaussianKernel(kernel_cache, rbf_width);
SG_REF(kerneltr1);
kerneltr1->init(featurestr1, featurestr1);
// create svm via libsvm and train
CLibSVM* svm1 = new CLibSVM(svm_C, kerneltr1, labelstr);
SG_REF(svm1);
svm1->set_epsilon(svm_eps);
a=time(NULL);
std::cout << "training SVM over gaussian kernel"<<std::endl;
svm1->train();
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
printf("num_sv:%d b:%f\n", svm1->get_num_support_vectors(), svm1->get_bias());
a=time(NULL);
std::cout << "initializing shogun test feature"<<std::endl;
CDenseFeatures<float64_t>* featureste1 = new CDenseFeatures<float64_t>(feature_cache);
SG_REF(featureste1);
featureste1->set_feature_matrix(featte, dims, numte);
std::cout << "finished"<<std::endl;
//b=time(NULL);
//std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
//std::cout << "computing gaussian test kernel"<<std::endl;
CGaussianKernel* kernelte1 = new CGaussianKernel(kernel_cache, rbf_width);
SG_REF(kernelte1);
kernelte1->init(featurestr1, featureste1);
svm1->set_kernel(kernelte1);
a=time(NULL);
std::cout << "scoring gaussian test kernel"<<std::endl;
std::vector<float64_t> scoreste1(numte);
float64_t err1=0;
for(int32_t i=0; i< numte ;++i)
{
scoreste1[i]=svm1->classify_example(i);
if(scoreste1[i]*labte[i]<0)
{
err1+=1.0/numte;
}
}
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
// ***************************************
// now WITH the preprocessor
a=time(NULL);
std::cout << "initializing preprocessor"<<std::endl;
CRandomFourierGaussPreproc *rfgauss=new CRandomFourierGaussPreproc;
SG_REF(rfgauss);
rfgauss->get_io()->set_loglevel(MSG_DEBUG);
// ************************************************************
// set parameters of the preprocessor
// ******************************** !!!!!!!!!!!!!!!!! CMath::sqrt(rbf_width/2.0)
rfgauss->set_kernelwidth( CMath::sqrt(rbf_width/2.0) );
rfgauss->set_dim_input_space(dims);
rfgauss->set_dim_feature_space(randomfourier_featurespace_dim);
std::cout << "finished"<<std::endl;
//b=time(NULL);
//std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
// create train features
a=time(NULL);
std::cout << "initializing shogun train feature again"<<std::endl;
CDenseFeatures<float64_t>* featurestr2 = new CDenseFeatures<float64_t>(feature_cache);
SG_REF(featurestr2);
featurestr2->set_feature_matrix(feattr2, dims, numtr);
std::cout << "finished"<<std::endl;
//b=time(NULL);
//std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
// ************************************************************
// use preprocessor
// **************************************************************
// add preprocessor
featurestr2->add_preproc(rfgauss);
// apply preprocessor
a=time(NULL);
std::cout << "applying preprocessor to train feature"<<std::endl;
featurestr2->apply_preproc();
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
// save random coefficients and state data of preprocessor for use with a new preprocessor object (see lines following "// now the same with a new preprocessor to show the usage of set_randomcoefficients"
// Alternative: use built-in serialization to load and save state data from/to a file!!!
float64_t *randomcoeff_additive2, * randomcoeff_multiplicative2;
int32_t dim_feature_space2,dim_input_space2;
float64_t kernelwidth2;
rfgauss->get_randomcoefficients(&randomcoeff_additive2,
&randomcoeff_multiplicative2,
&dim_feature_space2, &dim_input_space2, &kernelwidth2);
// create linear kernel
//std::cout << "computing linear train kernel over preprocessed features"<<std::endl;
CLinearKernel* kerneltr2 = new CLinearKernel();
SG_REF(kerneltr2);
kerneltr2->init(featurestr2, featurestr2);
// create svm via libsvm and train
CLibSVM* svm2 = new CLibSVM(svm_C, kerneltr2, labelstr);
SG_REF(svm2);
svm2->set_epsilon(svm_eps);
a=time(NULL);
std::cout << "training SVM over linear kernel over preprocessed features"<<std::endl;
svm2->train();
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
printf("num_sv:%d b:%f\n", svm2->get_num_support_vectors(), svm2->get_bias());
a=time(NULL);
std::cout << "initializing shogun test feature again"<<std::endl;
CDenseFeatures<float64_t>* featureste2 = new CDenseFeatures<float64_t>(feature_cache);
SG_REF(featureste2);
featureste2->set_feature_matrix(featte2, dims, numte);
std::cout << "finished"<<std::endl;
//b=time(NULL);
//std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
// ************************************************************
// use preprocessor
// **************************************************************
CRandomFourierGaussPreproc *rfgauss2=new CRandomFourierGaussPreproc;
SG_REF(rfgauss2);
rfgauss2->get_io()->set_loglevel(MSG_DEBUG);
// add preprocessor
featureste2->add_preproc(rfgauss);
// apply preprocessor
a=time(NULL);
std::cout << "applying same preprocessor to test feature"<<std::endl;
featureste2->apply_preproc();
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
//std::cout << "computing linear test kernel over preprocessed features"<<std::endl;
CLinearKernel* kernelte2 = new CLinearKernel();
SG_REF(kernelte2);
kernelte2->init(featurestr2, featureste2);
//std::cout << "finished"<<std::endl;
//b=time(NULL);
//std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
svm2->set_kernel(kernelte2);
a=time(NULL);
std::cout << "scoring linear test kernel over preprocessed features"<<std::endl;
std::vector<float64_t> scoreste2(numte);
float64_t err2=0;
for(int32_t i=0; i< numte ;++i)
{
scoreste2[i]=svm2->classify_example(i);
if(scoreste2[i]*labte[i]<0)
{
err2+=1.0/numte;
}
}
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
std::cout << "pausing 12 seconds"<<std::endl;
sleep(12);
// ************************************************************
// compare results
// **************************************************************
int32_t num_labeldiffs=0;
float64_t avg_scorediff=0;
for(int32_t i=0; i< numte ;++i)
{
if( (int32_t)CMath::sign(scoreste1[i]) != (int32_t)CMath::sign(scoreste2[i]))
{
++num_labeldiffs;
}
avg_scorediff+=CMath::abs(scoreste1[i]-scoreste2[i])/numte;
std::cout<< "at sample i"<< i <<" label 1= " << CMath::sign(scoreste1[i]) <<" label 2= " << CMath::sign(scoreste2[i])<< " scorediff " << scoreste1[i] << " - " <<scoreste2[i] <<" = " << CMath::abs(scoreste1[i]-scoreste2[i])<<std::endl;
}
std::cout << "usedwidth for rbf kernel"<< kerneltr1->get_width() << " " << kernelte1->get_width()<<std::endl;
std::cout<< "number of different labels between gaussian kernel and rfgauss "<< num_labeldiffs<< " out of "<< numte << " labels "<<std::endl;
std::cout<< "average test sample SVM output score difference between gaussian kernel and rfgauss "<< avg_scorediff<<std::endl;
std::cout<< "classification errors gaussian kernel and rfgauss "<< err1 << " " <<err2<<std::endl;
a=time(NULL);
std::cout << "computing effective kernel widths (means of inner distances)"<<std::endl;
int32_t m, n;
float64_t * kertr1;
kerneltr1->get_kernel_matrix ( &kertr1, &m, &n);
std::cout << "kernel size "<< m << " "<< n <<std::endl;
float64_t avgdist1=0;
for(int i=0; i<m ;++i)
{
for(int l=0; l<i ;++l)
{
avgdist1+= -CMath::log(kertr1[i+l*m])*2.0/m/(m+1.0);
}
}
float64_t * kertr2;
kerneltr2->get_kernel_matrix (&kertr2,&m, &n);
float64_t avgdist2=0;
float64_t numnegratio=0;
for(int i=0; i<m ;++i)
{
for(int l=0; l<i ;++l)
{
if(kertr2[i+l*m]<=0)
{
numnegratio+=2.0/m/(m+1.0);
}
else
{
avgdist2+= -CMath::log(std::max(kertr2[i+l*m],1e-10))*2.0/m/(m+1.0);
}
}
}
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
std::cout << "effective kernel width for gaussian kernel and RFgauss "<< avgdist1 << " " <<avgdist2/(1.0-numnegratio) << std::endl<< " numnegratio (negative entries in RFgauss approx kernel)"<< numnegratio<<std::endl;
// **********************************************
// now the same with a new preprocessor to show the usage of set_randomcoefficients
// ********************************************8
CDenseFeatures<float64_t>* featureste3 = new CDenseFeatures<float64_t>(feature_cache);
SG_REF(featureste3);
featureste3->set_feature_matrix(featte3, dims, numte);
std::cout << "finished"<<std::endl;
//b=time(NULL);
//std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
// ************************************************************
// use preprocessor
// **************************************************************
rfgauss2->set_randomcoefficients(
randomcoeff_additive2,
randomcoeff_multiplicative2,
dim_feature_space2, dim_input_space2, kernelwidth2);
// add preprocessor
featureste3->add_preproc(rfgauss2);
// apply preprocessor
a=time(NULL);
std::cout << "applying same preprocessor to test feature"<<std::endl;
featureste3->apply_preproc();
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
//std::cout << "computing linear test kernel over preprocessed features"<<std::endl;
CLinearKernel* kernelte3 = new CLinearKernel();
SG_REF(kernelte3);
kernelte2->init(featurestr2, featureste3);
//std::cout << "finished"<<std::endl;
//b=time(NULL);
//std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
svm2->set_kernel(kernelte3);
a=time(NULL);
std::cout << "scoring linear test kernel over preprocessed features"<<std::endl;
std::vector<float64_t> scoreste3(numte);
float64_t err3=0;
for(int32_t i=0; i< numte ;++i)
{
scoreste3[i]=svm2->classify_example(i);
if(scoreste3[i]*labte[i]<0)
{
err3+=1.0/numte;
}
}
std::cout << "finished"<<std::endl;
b=time(NULL);
std::cout<< "elapsed time in seconds "<<b-a <<std::endl;
std::cout << "pausing 12 seconds"<<std::endl;
sleep(12);
// ************************************************************
// compare results
// **************************************************************
num_labeldiffs=0;
avg_scorediff=0;
for(int32_t i=0; i< numte ;++i)
{
if( (int32_t)CMath::sign(scoreste1[i]) != (int32_t)CMath::sign(scoreste3[i]))
{
++num_labeldiffs;
}
avg_scorediff+=CMath::abs(scoreste1[i]-scoreste3[i])/numte;
std::cout<< "at sample i"<< i <<" label 1= " << CMath::sign(scoreste1[i]) <<" label 2= " << CMath::sign(scoreste3[i])<< " scorediff " << scoreste1[i] << " - " <<scoreste3[i] <<" = " << CMath::abs(scoreste1[i]-scoreste3[i])<<std::endl;
}
std::cout<< "number of different labels between gaussian kernel and rfgauss "<< num_labeldiffs<< " out of "<< numte << " labels "<<std::endl;
std::cout<< "average test sample SVM output score difference between gaussian kernel and rfgauss "<< avg_scorediff<<std::endl;
std::cout<< "classification errors gaussian kernel and rfgauss "<< err1 << " " <<err3<<std::endl;
SG_FREE(randomcoeff_additive2);
SG_FREE(randomcoeff_multiplicative2);
SG_FREE(labtr);
SG_FREE(labte);
SG_FREE(kertr1);
SG_FREE(kertr2);
SG_UNREF(labelstr);
SG_UNREF(kerneltr1);
SG_UNREF(kerneltr2);
SG_UNREF(kernelte1);
SG_UNREF(kernelte2);
SG_UNREF(kernelte3);
SG_UNREF(featurestr1);
SG_UNREF(featurestr2);
SG_UNREF(featureste1);
SG_UNREF(featureste2);
SG_UNREF(featureste3);
SG_UNREF(svm1);
SG_UNREF(svm2);
SG_UNREF(rfgauss);
SG_UNREF(rfgauss2);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Shashwat Lal Das
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*
* This example demonstrates use of the Vowpal Wabbit learning algorithm.
*/
#include <shogun/lib/common.h>
#include <shogun/io/StreamingAsciiFile.h>
#include <shogun/features/StreamingDenseFeatures.h>
#include <shogun/multiclass/tree/RandomConditionalProbabilityTree.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
const char* train_file_name = "../data/7class_example4_train.dense";
const char* test_file_name = "../data/7class_example4_test.dense";
CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name);
SG_REF(train_file);
CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024);
SG_REF(train_features);
CRandomConditionalProbabilityTree *cpt = new CRandomConditionalProbabilityTree();
cpt->set_num_passes(1);
cpt->set_features(train_features);
cpt->train();
cpt->print_tree();
CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name);
SG_REF(test_file);
CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
SG_REF(test_features);
CMulticlassLabels *pred = cpt->apply_multiclass(test_features);
test_features->reset_stream();
SG_SPRINT("num_labels = %d\n", pred->get_num_labels());
SG_UNREF(test_features);
SG_UNREF(test_file);
test_file = new CStreamingAsciiFile(test_file_name);
SG_REF(test_file);
test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024);
SG_REF(test_features);
CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels());
test_features->start_parser();
for (int32_t i=0; i < pred->get_num_labels(); ++i)
{
test_features->get_next_example();
gnd->set_int_label(i, test_features->get_label());
test_features->release_example();
}
test_features->end_parser();
int32_t n_correct = 0;
for (index_t i=0; i < pred->get_num_labels(); ++i)
{
if (pred->get_int_label(i) == gnd->get_int_label(i))
n_correct++;
//SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i));
}
SG_SPRINT("\n");
SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels());
SG_UNREF(train_features);
SG_UNREF(test_features);
SG_UNREF(train_file);
SG_UNREF(test_file);
SG_UNREF(cpt);
SG_UNREF(pred);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Evangelos Anagnostopoulos
* Copyright (C) 2013 Evangelos Anagnostopoulos
*
* This example demonstrates the use of the Random Fourier Dot Features with
* a linear classifier.
*/
#include <shogun/base/init.h>
#include <shogun/features/RandomFourierDotFeatures.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/classifier/svm/SVMOcas.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/evaluation/PRCEvaluation.h>
using namespace shogun;
void load_data(int32_t num_dim, int32_t num_vecs,
CDenseFeatures<float64_t>*& feats, CBinaryLabels*& labels)
{
SGMatrix<float64_t> mat(num_dim, num_vecs);
SGVector<float64_t> labs(num_vecs);
for (index_t i=0; i<num_vecs; i++)
{
for (index_t j=0; j<num_dim; j++)
{
if ((i+j)%2==0)
{
labs[i] = -1;
mat(j,i) = CMath::random(0,1) + 0.5;
}
else
{
labs[i] = 1;
mat(j,i) = CMath::random(0,1) - 0.5;
}
}
}
feats = new CDenseFeatures<float64_t>(mat);
labels = new CBinaryLabels(labs);
}
int main(int argv, char** argc)
{
init_shogun_with_defaults();
int32_t num_dim = 100;
int32_t num_vecs = 10000;
CDenseFeatures<float64_t>* dense_feats = 0;
CBinaryLabels* labels = 0;
load_data(num_dim, num_vecs, dense_feats, labels);
/** Specifying the kernel parameter for the Gaussian approximation of RFFeatures,
* as specified in its documentation in KernelName.
* We set the kernel width of the Gaussian kernel we are approximating to 8.
*/
SGVector<float64_t> params(1);
params[0] = 8;
/** Specifying the number of samples for the RFFeatures */
int32_t D = 300;
/** Creating a new RandomFourierDotFeatures object, that will work on
* the data that we created before, will use D number of samples and
* will generate parameters for a Gaussian Kernel approximation of
* width given in params
*/
CRandomFourierDotFeatures* rf_feats = new CRandomFourierDotFeatures(
dense_feats, D, KernelName::GAUSSIAN, params);
/** Now the previous RFFeatures object can be used with a linear
* classifier
*/
//CLibLinear* lin_svm = new CLibLinear(C, r_feats, labels);
float64_t C = 0.1;
float64_t epsilon = 0.01;
CSVMOcas* lin_svm = new CSVMOcas(C, rf_feats, labels);
lin_svm->set_epsilon(epsilon);
lin_svm->train();
CBinaryLabels* predicted = CLabelsFactory::to_binary(lin_svm->apply());
CPRCEvaluation* evaluator = new CPRCEvaluation();
float64_t auPRC = evaluator->evaluate(predicted, labels);
//SG_SPRINT("Training auPRC = %f\n", auPRC);
SG_UNREF(lin_svm);
SG_UNREF(predicted);
exit_shogun();
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Jacob Walker
*/
#include <shogun/lib/config.h>
// temporally disabled, since API was changed
#if defined(HAVE_NLOPT) && 0
#include <shogun/base/init.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/LinearARDKernel.h>
#include <shogun/mathematics/Math.h>
#include <shogun/machine/gp/ExactInferenceMethod.h>
#include <shogun/machine/gp/GaussianLikelihood.h>
#include <shogun/machine/gp/ZeroMean.h>
#include <shogun/regression/GaussianProcessRegression.h>
#include <shogun/evaluation/GradientEvaluation.h>
#include <shogun/modelselection/GradientModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/evaluation/GradientCriterion.h>
using namespace shogun;
int32_t num_vectors=4;
int32_t dim_vectors=3;
void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train,
CRegressionLabels* labels)
{
/*Fill Matrices with random nonsense*/
train[0] = -1;
train[1] = -1;
train[2] = -1;
train[3] = 1;
train[4] = 1;
train[5] = 1;
train[6] = -10;
train[7] = -10;
train[8] = -10;
train[9] = 3;
train[10] = 2;
train[11] = 1;
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
test[i]=i*sin(i)*.96;
/* create labels, two classes */
for (index_t i=0; i<num_vectors; ++i)
{
if(i%2 == 0) labels->set_label(i, 1);
else labels->set_label(i, -1);
}
}
CModelSelectionParameters* build_tree(CInferenceMethod* inf,
CLikelihoodModel* lik, CKernel* kernel,
SGVector<float64_t>& weights)
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1 =
new CModelSelectionParameters("inference_method", inf);
root->append_child(c1);
CModelSelectionParameters* c2 =
new CModelSelectionParameters("likelihood_model", lik);
c1->append_child(c2);
CModelSelectionParameters* c3=new CModelSelectionParameters("sigma");
c2->append_child(c3);
c3->build_values(1.0, 4.0, R_LINEAR);
CModelSelectionParameters* c4=new CModelSelectionParameters("scale");
c1->append_child(c4);
c4->build_values(1.0, 1.0, R_LINEAR);
CModelSelectionParameters* c5 =
new CModelSelectionParameters("kernel", kernel);
c1->append_child(c5);
CModelSelectionParameters* c6 =
new CModelSelectionParameters("weights");
c5->append_child(c6);
c6->build_values_sgvector(0.001, 4.0, R_LINEAR, &weights);
return root;
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
/* create some data and labels */
SGMatrix<float64_t> matrix =
SGMatrix<float64_t>(dim_vectors, num_vectors);
SGVector<float64_t> weights(dim_vectors);
SGMatrix<float64_t> matrix2 =
SGMatrix<float64_t>(dim_vectors, num_vectors);
CRegressionLabels* labels=new CRegressionLabels(num_vectors);
build_matrices(matrix2, matrix, labels);
/* create training features */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
features->set_feature_matrix(matrix);
/* create testing features */
CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> ();
features2->set_feature_matrix(matrix2);
SG_REF(features);
SG_REF(features2);
SG_REF(labels);
/*Allocate our Kernel*/
CLinearARDKernel* test_kernel = new CLinearARDKernel(10);
test_kernel->init(features, features);
/*Allocate our mean function*/
CZeroMean* mean = new CZeroMean();
/*Allocate our likelihood function*/
CGaussianLikelihood* lik = new CGaussianLikelihood();
/*Allocate our inference method*/
CExactInferenceMethod* inf =
new CExactInferenceMethod(test_kernel,
features, mean, labels, lik);
SG_REF(inf);
/*Finally use these to allocate the Gaussian Process Object*/
CGaussianProcessRegression* gp =
new CGaussianProcessRegression(inf);
SG_REF(gp);
/*Build the parameter tree for model selection*/
CModelSelectionParameters* root = build_tree(inf, lik, test_kernel,
weights);
/*Criterion for gradient search*/
CGradientCriterion* crit = new CGradientCriterion();
/*This will evaluate our inference method for its derivatives*/
CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels,
crit);
grad->set_function(inf);
gp->print_modsel_params();
root->print_tree();
/* handles all of the above structures in memory */
CGradientModelSelection* grad_search=new CGradientModelSelection(
root, grad);
/* set autolocking to false to get rid of warnings */
grad->set_autolock(false);
/*Search for best parameters*/
CParameterCombination* best_combination=grad_search->select_model(true);
/*Output all the results and information*/
if (best_combination)
{
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();
best_combination->apply_to_machine(gp);
}
CGradientResult* result=(CGradientResult*)grad->evaluate();
if(result->get_result_type() != GRADIENTEVALUATION_RESULT)
SG_SERROR("Evaluation result not a GradientEvaluationResult!");
result->print_result();
SGVector<float64_t> alpha = inf->get_alpha();
SGVector<float64_t> labe = labels->get_labels();
SGVector<float64_t> diagonal = inf->get_diagonal_vector();
SGMatrix<float64_t> cholesky = inf->get_cholesky();
CRegressionLabels* predictions=gp->apply_regression(features);
SGVector<float64_t> variance_vector=gp->get_variance_vector(features);
alpha.display_vector("Alpha Vector");
labe.display_vector("Labels");
diagonal.display_vector("sW Matrix");
variance_vector.display_vector("Predicted Variances");
predictions->get_labels().display_vector("Mean Predictions");
cholesky.display_matrix("Cholesky Matrix L");
matrix.display_matrix("Training Features");
matrix2.display_matrix("Testing Features");
/*free memory*/
SG_UNREF(features);
SG_UNREF(features2);
SG_UNREF(predictions);
SG_UNREF(labels);
SG_UNREF(inf);
SG_UNREF(gp);
SG_UNREF(grad_search);
SG_UNREF(best_combination);
SG_UNREF(result);
exit_shogun();
return 0;
}
#else
int main(int argc, char **argv)
{
return 0;
}
#endif
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Jacob Walker
*/
#ifdef USE_GPL_SHOGUN
#include <shogun/lib/config.h>
// temporally disabled, since API was changed
#if defined(HAVE_NLOPT) && 0
#include <shogun/base/init.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/mathematics/Math.h>
#include <shogun/machine/gp/FITCInferenceMethod.h>
#include <shogun/machine/gp/GaussianLikelihood.h>
#include <shogun/machine/gp/ZeroMean.h>
#include <shogun/regression/GaussianProcessRegression.h>
#include <shogun/evaluation/GradientEvaluation.h>
#include <shogun/modelselection/GradientModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/evaluation/GradientCriterion.h>
using namespace shogun;
int32_t num_vectors=4;
int32_t dim_vectors=3;
void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train,
CRegressionLabels* labels)
{
/*Fill Matrices with random nonsense*/
train[0] = -1;
train[1] = -1;
train[2] = -1;
train[3] = 1;
train[4] = 1;
train[5] = 1;
train[6] = -10;
train[7] = -10;
train[8] = -10;
train[9] = 3;
train[10] = 2;
train[11] = 1;
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
test[i]=i*sin(i)*.96;
/* create labels, two classes */
for (index_t i=0; i<num_vectors; ++i)
{
if(i%2 == 0) labels->set_label(i, 1);
else labels->set_label(i, -1);
}
}
CModelSelectionParameters* build_tree(CInferenceMethod* inf,
CLikelihoodModel* lik, CKernel* kernel)
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1 =
new CModelSelectionParameters("inference_method", inf);
root->append_child(c1);
CModelSelectionParameters* c2 = new CModelSelectionParameters("scale");
c1 ->append_child(c2);
c2->build_values(0.01, 4.0, R_LINEAR);
CModelSelectionParameters* c3 =
new CModelSelectionParameters("likelihood_model", lik);
c1->append_child(c3);
CModelSelectionParameters* c4=new CModelSelectionParameters("sigma");
c3->append_child(c4);
c4->build_values(0.01, 4.0, R_LINEAR);
CModelSelectionParameters* c5 =
new CModelSelectionParameters("kernel", kernel);
c1->append_child(c5);
CModelSelectionParameters* c6 =
new CModelSelectionParameters("width");
c5->append_child(c6);
c6->build_values(0.01, 4.0, R_LINEAR);
return root;
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
/* create some data and labels */
SGMatrix<float64_t> matrix =
SGMatrix<float64_t>(dim_vectors, num_vectors);
SGMatrix<float64_t> matrix2 =
SGMatrix<float64_t>(dim_vectors, num_vectors);
CRegressionLabels* labels=new CRegressionLabels(num_vectors);
build_matrices(matrix2, matrix, labels);
/* create training features */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
features->set_feature_matrix(matrix);
/* create testing features */
CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> ();
features2->set_feature_matrix(matrix2);
SG_REF(labels);
/*Allocate our Kernel*/
CGaussianKernel* test_kernel = new CGaussianKernel(10, 2);
test_kernel->init(features, features);
/*Allocate our mean function*/
CZeroMean* mean = new CZeroMean();
/*Allocate our likelihood function*/
CGaussianLikelihood* lik = new CGaussianLikelihood();
//SG_SPRINT("features2 bef inf rc= %d\n",features2->ref_count());
/*Allocate our inference method*/
CFITCInferenceMethod* inf =
new CFITCInferenceMethod(test_kernel,
features, mean, labels, lik, features2);
//SG_SPRINT("features2 aft inf rc= %d\n",features2->ref_count());
SG_REF(inf);
/*Finally use these to allocate the Gaussian Process Object*/
CGaussianProcessRegression* gp =
new CGaussianProcessRegression(inf);
SG_REF(gp);
/*Build the parameter tree for model selection*/
CModelSelectionParameters* root = build_tree(inf, lik, test_kernel);
/*Criterion for gradient search*/
CGradientCriterion* crit = new CGradientCriterion();
/*This will evaluate our inference method for its derivatives*/
CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels,
crit);
grad->set_function(inf);
gp->print_modsel_params();
root->print_tree();
/* handles all of the above structures in memory */
CGradientModelSelection* grad_search=new CGradientModelSelection(
root, grad);
/* set autolocking to false to get rid of warnings */
grad->set_autolock(false);
/*Search for best parameters*/
CParameterCombination* best_combination=grad_search->select_model(true);
/*Output all the results and information*/
if (best_combination)
{
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();
best_combination->apply_to_machine(gp);
}
CGradientResult* result=(CGradientResult*)grad->evaluate();
if(result->get_result_type() != GRADIENTEVALUATION_RESULT)
SG_SERROR("Evaluation result not a GradientEvaluationResult!");
result->print_result();
SGVector<float64_t> alpha = inf->get_alpha();
SGVector<float64_t> labe = labels->get_labels();
SGVector<float64_t> diagonal = inf->get_diagonal_vector();
SGMatrix<float64_t> cholesky = inf->get_cholesky();
CRegressionLabels* predictions=gp->apply_regression(features);
SGVector<float64_t> variance_vector=gp->get_variance_vector(features);
alpha.display_vector("Alpha Vector");
labe.display_vector("Labels");
diagonal.display_vector("sW Matrix");
variance_vector.display_vector("Predicted Variances");
predictions->get_labels().display_vector("Mean Predictions");
cholesky.display_matrix("Cholesky Matrix L");
matrix.display_matrix("Training Features");
matrix2.display_matrix("Testing Features");
/*free memory*/
SG_UNREF(predictions);
SG_UNREF(labels);
SG_UNREF(inf);
SG_UNREF(gp);
SG_UNREF(grad_search);
SG_UNREF(best_combination);
SG_UNREF(result);
exit_shogun();
return 0;
}
#else
int main(int argc, char **argv)
{
return 0;
}
#endif
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Jacob Walker
*/
#include <shogun/lib/config.h>
// temporally disabled, since API was changed
#if defined(HAVE_NLOPT) && 0
#include <shogun/base/init.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/mathematics/Math.h>
#include <shogun/machine/gp/LaplacianInferenceMethod.h>
#include <shogun/machine/gp/StudentsTLikelihood.h>
#include <shogun/machine/gp/ZeroMean.h>
#include <shogun/regression/GaussianProcessRegression.h>
#include <shogun/evaluation/GradientEvaluation.h>
#include <shogun/modelselection/GradientModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/evaluation/GradientCriterion.h>
using namespace shogun;
int32_t num_vectors=4;
int32_t dim_vectors=3;
void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train,
CRegressionLabels* labels)
{
/*Fill Matrices with random nonsense*/
train[0] = -1;
train[1] = -1;
train[2] = -1;
train[3] = 1;
train[4] = 1;
train[5] = 1;
train[6] = -10;
train[7] = -10;
train[8] = -10;
train[9] = 3;
train[10] = 2;
train[11] = 1;
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
test[i]=i*sin(i)*.96;
/* create labels, two classes */
for (index_t i=0; i<num_vectors; ++i)
{
if(i%2 == 0) labels->set_label(i, 1);
else labels->set_label(i, -1);
}
}
CModelSelectionParameters* build_tree(CInferenceMethod* inf,
CLikelihoodModel* lik, CKernel* kernel)
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1 =
new CModelSelectionParameters("inference_method", inf);
root->append_child(c1);
CModelSelectionParameters* c2 = new CModelSelectionParameters("scale");
c1 ->append_child(c2);
c2->build_values(0.5, 4.0, R_LINEAR);
CModelSelectionParameters* c3 =
new CModelSelectionParameters("likelihood_model", lik);
c1->append_child(c3);
CModelSelectionParameters* c4=new CModelSelectionParameters("sigma");
c3->append_child(c4);
c4->build_values(0.01, 4.0, R_LINEAR);
CModelSelectionParameters* c43=new CModelSelectionParameters("df");
c3->append_child(c43);
c43->build_values(500.0, 1000.0, R_LINEAR);
CModelSelectionParameters* c5 =
new CModelSelectionParameters("kernel", kernel);
c1->append_child(c5);
CModelSelectionParameters* c6 =
new CModelSelectionParameters("width");
c5->append_child(c6);
c6->build_values(0.01, 4.0, R_LINEAR);
return root;
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
/* create some data and labels */
SGMatrix<float64_t> matrix =
SGMatrix<float64_t>(dim_vectors, num_vectors);
SGMatrix<float64_t> matrix2 =
SGMatrix<float64_t>(dim_vectors, num_vectors);
CRegressionLabels* labels=new CRegressionLabels(num_vectors);
build_matrices(matrix2, matrix, labels);
/* create training features */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
features->set_feature_matrix(matrix);
/* create testing features */
CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> ();
features2->set_feature_matrix(matrix2);
SG_REF(features);
SG_REF(features2);
SG_REF(labels);
/*Allocate our Kernel*/
CGaussianKernel* test_kernel = new CGaussianKernel(10, 2);
test_kernel->init(features, features);
/*Allocate our mean function*/
CZeroMean* mean = new CZeroMean();
/*Allocate our likelihood function*/
CStudentsTLikelihood* lik = new CStudentsTLikelihood();
/*Allocate our inference method*/
CLaplacianInferenceMethod* inf =
new CLaplacianInferenceMethod(test_kernel,
features, mean, labels, lik);
SG_REF(inf);
/*Finally use these to allocate the Gaussian Process Object*/
CGaussianProcessRegression* gp =
new CGaussianProcessRegression(inf);
SG_REF(gp);
/*Build the parameter tree for model selection*/
CModelSelectionParameters* root = build_tree(inf, lik, test_kernel);
/*Criterion for gradient search*/
CGradientCriterion* crit = new CGradientCriterion();
/*This will evaluate our inference method for its derivatives*/
CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels,
crit);
grad->set_function(inf);
gp->print_modsel_params();
root->print_tree();
/* handles all of the above structures in memory */
CGradientModelSelection* grad_search=new CGradientModelSelection(
root, grad);
/* set autolocking to false to get rid of warnings */
grad->set_autolock(false);
/*Search for best parameters*/
CParameterCombination* best_combination=grad_search->select_model(true);
/*Output all the results and information*/
if (best_combination)
{
SG_SPRINT("best parameter(s):\n");
best_combination->print_tree();
best_combination->apply_to_machine(gp);
}
CGradientResult* result=(CGradientResult*)grad->evaluate();
if(result->get_result_type() != GRADIENTEVALUATION_RESULT)
SG_SERROR("Evaluation result not a GradientEvaluationResult!");
result->print_result();
SGVector<float64_t> alpha = inf->get_alpha();
SGVector<float64_t> labe = labels->get_labels();
SGVector<float64_t> diagonal = inf->get_diagonal_vector();
SGMatrix<float64_t> cholesky = inf->get_cholesky();
CRegressionLabels* predictions=gp->apply_regression(features);
SGVector<float64_t> variance_vector=gp->get_variance_vector(features);
alpha.display_vector("Alpha Vector");
labe.display_vector("Labels");
diagonal.display_vector("sW Matrix");
variance_vector.display_vector("Predicted Variances");
predictions->get_labels().display_vector("Mean Predictions");
cholesky.display_matrix("Cholesky Matrix L");
matrix.display_matrix("Training Features");
matrix2.display_matrix("Testing Features");
/*free memory*/
SG_UNREF(features);
SG_UNREF(features2);
SG_UNREF(predictions);
SG_UNREF(labels);
SG_UNREF(inf);
SG_UNREF(gp);
SG_UNREF(grad_search);
SG_UNREF(best_combination);
SG_UNREF(result);
exit_shogun();
return 0;
}
#else
int main(int argc, char **argv)
{
return 0;
}
#endif
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Jacob Walker
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#if defined(HAVE_NLOPT)
#include <shogun/base/init.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/mathematics/Math.h>
#include <shogun/machine/gp/ExactInferenceMethod.h>
#include <shogun/machine/gp/GaussianLikelihood.h>
#include <shogun/machine/gp/ZeroMean.h>
#include <shogun/regression/GaussianProcessRegression.h>
#include <shogun/evaluation/GradientEvaluation.h>
#include <shogun/modelselection/GradientModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/evaluation/GradientCriterion.h>
#include <shogun/kernel/ProductKernel.h>
using namespace shogun;
int32_t num_vectors=4;
int32_t dim_vectors=3;
void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train,
CRegressionLabels* labels)
{
/*Fill Matrices with random nonsense*/
train[0] = -1;
train[1] = -1;
train[2] = -1;
train[3] = 1;
train[4] = 1;
train[5] = 1;
train[6] = -10;
train[7] = -10;
train[8] = -10;
train[9] = 3;
train[10] = 2;
train[11] = 1;
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
test[i]=i*sin(i)*.96;
/* create labels, two classes */
for (index_t i=0; i<num_vectors; ++i)
{
if(i%2 == 0) labels->set_label(i, 1);
else labels->set_label(i, -1);
}
}
/* HEIKO FIXME
CModelSelectionParameters* build_tree(CInferenceMethod* inf,
CLikelihoodModel* lik, CProductKernel* kernel)
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1 =
new CModelSelectionParameters("inference_method", inf);
root->append_child(c1);
CModelSelectionParameters* c2 = new CModelSelectionParameters("scale");
c1 ->append_child(c2);
c2->build_values(0.99, 1.01, R_LINEAR);
CModelSelectionParameters* c3 =
new CModelSelectionParameters("likelihood_model", lik);
c1->append_child(c3);
CModelSelectionParameters* c4=new CModelSelectionParameters("sigma");
c3->append_child(c4);
c4->build_values(1.0, 4.0, R_LINEAR);
CModelSelectionParameters* c5 =
new CModelSelectionParameters("kernel", kernel);
c1->append_child(c5);
CList* list = kernel->get_list();
CModelSelectionParameters* cc1 = new CModelSelectionParameters("kernel_list", list);
c5->append_child(cc1);
CListElement* first = NULL;
CSGObject* k = list->get_first_element(first);
SG_UNREF(k);
SG_REF(first);
CModelSelectionParameters* cc2 = new CModelSelectionParameters("first", first);
cc1->append_child(cc2);
CKernel* sub_kernel1 = kernel->get_kernel(0);
CModelSelectionParameters* cc3 = new CModelSelectionParameters("data", sub_kernel1);
cc2->append_child(cc3);
SG_UNREF(sub_kernel1);
CListElement* second = first;
k = list->get_next_element(second);
SG_UNREF(k);
SG_REF(second);
CModelSelectionParameters* cc4 = new CModelSelectionParameters("next", second);
cc2->append_child(cc4);
CKernel* sub_kernel2 = kernel->get_kernel(1);
CModelSelectionParameters* cc5 = new CModelSelectionParameters("data", sub_kernel2);
cc4->append_child(cc5);
SG_UNREF(sub_kernel2);
CListElement* third = second;
k = list->get_next_element(third);
SG_UNREF(k);
SG_REF(third);
CModelSelectionParameters* cc6 = new CModelSelectionParameters("next", third);
cc4->append_child(cc6);
CKernel* sub_kernel3 = kernel->get_kernel(2);
CModelSelectionParameters* cc7 = new CModelSelectionParameters("data", sub_kernel3);
cc6->append_child(cc7);
SG_UNREF(sub_kernel3);
CModelSelectionParameters* c6 =
new CModelSelectionParameters("width");
cc3->append_child(c6);
c6->build_values(1.0, 4.0, R_LINEAR);
CModelSelectionParameters* c66 =
new CModelSelectionParameters("combined_kernel_weight");
cc3->append_child(c66);
c66->build_values(0.001, 1.0, R_LINEAR);
CModelSelectionParameters* c7 =
new CModelSelectionParameters("width");
cc5->append_child(c7);
c7->build_values(1.0, 4.0, R_LINEAR);
CModelSelectionParameters* c77 =
new CModelSelectionParameters("combined_kernel_weight");
cc5->append_child(c77);
c77->build_values(0.001, 1.0, R_LINEAR);
CModelSelectionParameters* c8 =
new CModelSelectionParameters("width");
cc7->append_child(c8);
c8->build_values(1.0, 4.0, R_LINEAR);
CModelSelectionParameters* c88 =
new CModelSelectionParameters("combined_kernel_weight");
cc7->append_child(c88);
c88->build_values(0.001, 1.0, R_LINEAR);
SG_UNREF(list);
return root;
}
*/
int main(int argc, char **argv)
{
init_shogun_with_defaults();
/* create some data and labels */
SGMatrix<float64_t> matrix =
SGMatrix<float64_t>(dim_vectors, num_vectors);
SGMatrix<float64_t> matrix2 =
SGMatrix<float64_t>(dim_vectors, num_vectors);
CRegressionLabels* labels=new CRegressionLabels(num_vectors);
build_matrices(matrix2, matrix, labels);
/* create training features */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
features->set_feature_matrix(matrix);
CCombinedFeatures* comb_features=new CCombinedFeatures();
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
CProductKernel* test_kernel = new CProductKernel();
CGaussianKernel* sub_kernel1 = new CGaussianKernel(10, 2);
CGaussianKernel* sub_kernel2 = new CGaussianKernel(10, 2);
CGaussianKernel* sub_kernel3 = new CGaussianKernel(10, 2);
test_kernel->append_kernel(sub_kernel1);
test_kernel->append_kernel(sub_kernel2);
test_kernel->append_kernel(sub_kernel3);
SG_REF(comb_features);
SG_REF(labels);
/*Allocate our Mean Function*/
CZeroMean* mean = new CZeroMean();
/*Allocate our Likelihood Model*/
CGaussianLikelihood* lik = new CGaussianLikelihood();
/*Allocate our inference method*/
CExactInferenceMethod* inf =
new CExactInferenceMethod(test_kernel,
comb_features, mean, labels, lik);
SG_REF(inf);
/*Finally use these to allocate the Gaussian Process Object*/
CGaussianProcessRegression* gp =
new CGaussianProcessRegression(inf);
SG_REF(gp);
//CModelSelectionParameters* root = build_tree(inf, lik, test_kernel);
//
///*Criterion for gradient search*/
//CGradientCriterion* crit = new CGradientCriterion();
///*This will evaluate our inference method for its derivatives*/
//CGradientEvaluation* grad=new CGradientEvaluation(gp, comb_features, labels,
// crit);
//grad->set_function(inf);
//gp->print_modsel_params();
//root->print_tree();
///* handles all of the above structures in memory */
//CGradientModelSelection* grad_search=new CGradientModelSelection(
// root, grad);
///* set autolocking to false to get rid of warnings */
//grad->set_autolock(false);
///*Search for best parameters*/
//CParameterCombination* best_combination=grad_search->select_model(true);
///*Output all the results and information*/
//if (best_combination)
//{
// SG_SPRINT("best parameter(s):\n");
// best_combination->print_tree();
// best_combination->apply_to_machine(gp);
//}
//CGradientResult* result=(CGradientResult*)grad->evaluate();
//if(result->get_result_type() != GRADIENTEVALUATION_RESULT)
// SG_SERROR("Evaluation result not a GradientEvaluationResult!");
//result->print_result();
//SGVector<float64_t> alpha = inf->get_alpha();
//SGVector<float64_t> labe = labels->get_labels();
//SGVector<float64_t> diagonal = inf->get_diagonal_vector();
//SGMatrix<float64_t> cholesky = inf->get_cholesky();
//gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV);
//CRegressionLabels* covariance = gp->apply_regression(comb_features);
//gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS);
//
//CRegressionLabels* predictions = gp->apply_regression();
//alpha.display_vector("Alpha Vector");
//labe.display_vector("Labels");
//diagonal.display_vector("sW Matrix");
//covariance->get_labels().display_vector("Predicted Variances");
//predictions->get_labels().display_vector("Mean Predictions");
//cholesky.display_matrix("Cholesky Matrix L");
//matrix.display_matrix("Training Features");
//matrix2.display_matrix("Testing Features");
///*free memory*/
//SG_UNREF(predictions);
//SG_UNREF(covariance);
SG_UNREF(labels);
SG_UNREF(comb_features);
SG_UNREF(inf);
SG_UNREF(gp);
//SG_UNREF(grad_search);
//SG_UNREF(best_combination);
//SG_UNREF(result);
exit_shogun();
return 0;
}
#else
int main(int argc, char **argv)
{
return 0;
}
#endif
#else //USE_GPL_SHOGUN
int main(int argc, char **argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann
*/
#ifdef USE_GPL_SHOGUN
#include <shogun/lib/config.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/regression/GaussianProcessRegression.h>
#include <shogun/machine/gp/ExactInferenceMethod.h>
#include <shogun/machine/gp/ZeroMean.h>
#include <shogun/machine/gp/GaussianLikelihood.h>
#include <shogun/base/init.h>
using namespace shogun;
void test()
{
/* create some easy regression data: 1d noisy sine wave */
index_t n=100;
float64_t x_range=6;
SGMatrix<float64_t> X(1, n);
SGMatrix<float64_t> X_test(1, n);
SGVector<float64_t> Y(n);
for (index_t i=0; i<n; ++i)
{
X[i]=CMath::random(0.0, x_range);
X_test[i]=(float64_t)i / n*x_range;
Y[i]=CMath::sin(X[i]);
}
/* shogun representation */
CDenseFeatures<float64_t>* feat_train=new CDenseFeatures<float64_t>(X);
CDenseFeatures<float64_t>* feat_test=new CDenseFeatures<float64_t>(X_test);
CRegressionLabels* label_train=new CRegressionLabels(Y);
/* specity GPR with exact inference */
float64_t sigma=1;
float64_t shogun_sigma=sigma*sigma*2;
CGaussianKernel* kernel=new CGaussianKernel(10, shogun_sigma);
CZeroMean* mean=new CZeroMean();
CGaussianLikelihood* lik=new CGaussianLikelihood();
lik->set_sigma(1);
CExactInferenceMethod* inf=new CExactInferenceMethod(kernel, feat_train,
mean, label_train, lik);
CGaussianProcessRegression* gpr=new CGaussianProcessRegression(inf);
/* perform inference */
CRegressionLabels* predictions=gpr->apply_regression(feat_test);
predictions->get_labels().display_vector("predictions");
SG_UNREF(predictions);
SG_UNREF(gpr);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
test();
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Jacob Walker
*/
#ifdef USE_GPL_SHOGUN
#include <shogun/lib/config.h>
#if defined(HAVE_NLOPT)
#include <shogun/base/init.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/mathematics/Math.h>
#include <shogun/machine/gp/ExactInferenceMethod.h>
#include <shogun/machine/gp/GaussianLikelihood.h>
#include <shogun/machine/gp/ZeroMean.h>
#include <shogun/regression/GaussianProcessRegression.h>
#include <shogun/evaluation/GradientEvaluation.h>
#include <shogun/modelselection/GradientModelSelection.h>
#include <shogun/modelselection/ModelSelectionParameters.h>
#include <shogun/modelselection/ParameterCombination.h>
#include <shogun/evaluation/GradientCriterion.h>
#include <shogun/kernel/CombinedKernel.h>
using namespace shogun;
int32_t num_vectors=4;
int32_t dim_vectors=3;
void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train,
CRegressionLabels* labels)
{
/*Fill Matrices with random nonsense*/
train[0] = -1;
train[1] = -1;
train[2] = -1;
train[3] = 1;
train[4] = 1;
train[5] = 1;
train[6] = -10;
train[7] = -10;
train[8] = -10;
train[9] = 3;
train[10] = 2;
train[11] = 1;
for (int32_t i=0; i<num_vectors*dim_vectors; i++)
test[i]=i*sin(i)*.96;
/* create labels, two classes */
for (index_t i=0; i<num_vectors; ++i)
{
if(i%2 == 0) labels->set_label(i, 1);
else labels->set_label(i, -1);
}
}
/* HEIKO FIXME
CModelSelectionParameters* build_tree(CInferenceMethod* inf,
CLikelihoodModel* lik, CCombinedKernel* kernel)
{
CModelSelectionParameters* root=new CModelSelectionParameters();
CModelSelectionParameters* c1 =
new CModelSelectionParameters("inference_method", inf);
root->append_child(c1);
CModelSelectionParameters* c2 = new CModelSelectionParameters("scale");
c1 ->append_child(c2);
c2->build_values(0.99, 1.01, R_LINEAR);
CModelSelectionParameters* c3 =
new CModelSelectionParameters("likelihood_model", lik);
c1->append_child(c3);
CModelSelectionParameters* c4=new CModelSelectionParameters("sigma");
c3->append_child(c4);
c4->build_values(0.001, 1.0, R_LINEAR);
CModelSelectionParameters* c5 =
new CModelSelectionParameters("kernel", kernel);
c1->append_child(c5);
CList* list = kernel->get_list();
CModelSelectionParameters* cc1 = new CModelSelectionParameters("kernel_list", list);
c5->append_child(cc1);
CListElement* first = NULL;
CSGObject* k = list->get_first_element(first);
SG_UNREF(k);
SG_REF(first);
CModelSelectionParameters* cc2 = new CModelSelectionParameters("first", first);
cc1->append_child(cc2);
CKernel* sub_kernel1 = kernel->get_kernel(0);
CModelSelectionParameters* cc3 = new CModelSelectionParameters("data", sub_kernel1);
cc2->append_child(cc3);
SG_UNREF(sub_kernel1);
CListElement* second = first;
k = list->get_next_element(second);
SG_UNREF(k);
SG_REF(second);
CModelSelectionParameters* cc4 = new CModelSelectionParameters("next", second);
cc2->append_child(cc4);
CKernel* sub_kernel2 = kernel->get_kernel(1);
CModelSelectionParameters* cc5 = new CModelSelectionParameters("data", sub_kernel2);
cc4->append_child(cc5);
SG_UNREF(sub_kernel2);
CListElement* third = second;
k = list->get_next_element(third);
SG_UNREF(k);
SG_REF(third);
CModelSelectionParameters* cc6 = new CModelSelectionParameters("next", third);
cc4->append_child(cc6);
CKernel* sub_kernel3 = kernel->get_kernel(2);
CModelSelectionParameters* cc7 = new CModelSelectionParameters("data", sub_kernel3);
cc6->append_child(cc7);
SG_UNREF(sub_kernel3);
CModelSelectionParameters* c6 =
new CModelSelectionParameters("width");
cc3->append_child(c6);
c6->build_values(1.0, 4.0, R_LINEAR);
CModelSelectionParameters* c66 =
new CModelSelectionParameters("combined_kernel_weight");
cc3->append_child(c66);
c66->build_values(0.001, 1.0, R_LINEAR);
CModelSelectionParameters* c7 =
new CModelSelectionParameters("width");
cc5->append_child(c7);
c7->build_values(1.0, 4.0, R_LINEAR);
CModelSelectionParameters* c77 =
new CModelSelectionParameters("combined_kernel_weight");
cc5->append_child(c77);
c77->build_values(0.001, 1.0, R_LINEAR);
CModelSelectionParameters* c8 =
new CModelSelectionParameters("width");
cc7->append_child(c8);
c8->build_values(1.0, 4.0, R_LINEAR);
CModelSelectionParameters* c88 =
new CModelSelectionParameters("combined_kernel_weight");
cc7->append_child(c88);
c88->build_values(0.001, 1.0, R_LINEAR);
SG_UNREF(list);
return root;
}
*/
int main(int argc, char **argv)
{
init_shogun_with_defaults();
/* create some data and labels */
SGMatrix<float64_t> matrix =
SGMatrix<float64_t>(dim_vectors, num_vectors);
SGMatrix<float64_t> matrix2 =
SGMatrix<float64_t>(dim_vectors, num_vectors);
CRegressionLabels* labels=new CRegressionLabels(num_vectors);
build_matrices(matrix2, matrix, labels);
/* create training features */
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> ();
features->set_feature_matrix(matrix);
CCombinedFeatures* comb_features=new CCombinedFeatures();
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
comb_features->append_feature_obj(features);
CCombinedKernel* test_kernel = new CCombinedKernel();
CGaussianKernel* sub_kernel1 = new CGaussianKernel(10, 2);
CGaussianKernel* sub_kernel2 = new CGaussianKernel(10, 2);
CGaussianKernel* sub_kernel3 = new CGaussianKernel(10, 2);
test_kernel->append_kernel(sub_kernel1);
test_kernel->append_kernel(sub_kernel2);
test_kernel->append_kernel(sub_kernel3);
SG_REF(comb_features);
SG_REF(labels);
/*Allocate our Mean Function*/
CZeroMean* mean = new CZeroMean();
/*Allocate our Likelihood Model*/
CGaussianLikelihood* lik = new CGaussianLikelihood();
/*Allocate our inference method*/
CExactInferenceMethod* inf =
new CExactInferenceMethod(test_kernel,
comb_features, mean, labels, lik);
SG_REF(inf);
/*Finally use these to allocate the Gaussian Process Object*/
CGaussianProcessRegression* gp =
new CGaussianProcessRegression(inf);
SG_REF(gp);
//CModelSelectionParameters* root = build_tree(inf, lik, test_kernel);
//
///*Criterion for gradient search*/
//CGradientCriterion* crit = new CGradientCriterion();
///*This will evaluate our inference method for its derivatives*/
//CGradientEvaluation* grad=new CGradientEvaluation(gp, comb_features, labels,
// crit);
//grad->set_function(inf);
//gp->print_modsel_params();
//root->print_tree();
///* handles all of the above structures in memory */
//CGradientModelSelection* grad_search=new CGradientModelSelection(
// root, grad);
///* set autolocking to false to get rid of warnings */
//grad->set_autolock(false);
///*Search for best parameters*/
//CParameterCombination* best_combination=grad_search->select_model(true);
///*Output all the results and information*/
//if (best_combination)
//{
// SG_SPRINT("best parameter(s):\n");
// best_combination->print_tree();
// best_combination->apply_to_machine(gp);
//}
//CGradientResult* result=(CGradientResult*)grad->evaluate();
//if(result->get_result_type() != GRADIENTEVALUATION_RESULT)
// SG_SERROR("Evaluation result not a GradientEvaluationResult!");
//result->print_result();
//SGVector<float64_t> alpha = inf->get_alpha();
//SGVector<float64_t> labe = labels->get_labels();
//SGVector<float64_t> diagonal = inf->get_diagonal_vector();
//SGMatrix<float64_t> cholesky = inf->get_cholesky();
//gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV);
//CRegressionLabels* covariance = gp->apply_regression(comb_features);
//gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS);
//
//CRegressionLabels* predictions = gp->apply_regression();
//alpha.display_vector("Alpha Vector");
//labe.display_vector("Labels");
//diagonal.display_vector("sW Matrix");
//covariance->get_labels().display_vector("Predicted Variances");
//predictions->get_labels().display_vector("Mean Predictions");
//cholesky.display_matrix("Cholesky Matrix L");
//matrix.display_matrix("Training Features");
//matrix2.display_matrix("Testing Features");
///*free memory*/
//SG_UNREF(predictions);
//SG_UNREF(covariance);
SG_UNREF(labels);
SG_UNREF(comb_features);
SG_UNREF(inf);
SG_UNREF(gp);
//SG_UNREF(grad_search);
//SG_UNREF(best_combination);
//SG_UNREF(result);
exit_shogun();
return 0;
}
#else // HAVE_NLOPT
int main(int argc, char **argv)
{
return 0;
}
#endif // HAVE_NLOPT
#else //USE_GPL_SHOGUN
int main(int argc, char **argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/regression/svr/LibSVR.h>
#include <shogun/evaluation/MeanSquaredError.h>
using namespace shogun;
void test_libsvr()
{
const int32_t kernel_cache=0;
const float64_t rbf_width=10;
const float64_t svm_C=10;
const float64_t svm_nu=0.01;
/* create some easy regression data: 1d noisy sine wave */
index_t n=100;
float64_t x_range=6;
SGMatrix<float64_t> feat_train(1, n);
SGMatrix<float64_t> feat_test(1, n);
SGVector<float64_t> lab_train(n);
SGVector<float64_t> lab_test(n);
for (index_t i=0; i<n; ++i)
{
feat_train[i]=CMath::random(0.0, x_range);
feat_test[i]=(float64_t)i/n*x_range;
lab_train[i]=CMath::sin(feat_train[i]);
lab_test[i]=CMath::sin(feat_test[i]);
}
/* shogun representation */
CLabels* labels_train=new CRegressionLabels(lab_train);
CLabels* labels_test=new CRegressionLabels(lab_test);
CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>(
feat_train);
CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>(
feat_test);
CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, rbf_width);
kernel->init(features_train, features_train);
// also epsilon svr possible here
LIBSVR_SOLVER_TYPE st=LIBSVR_NU_SVR;
CLibSVR* svm=new CLibSVR(svm_C, svm_nu, kernel, labels_train, st);
svm->train();
/* predict */
CRegressionLabels* predicted_labels=CLabelsFactory::to_regression(
svm->apply(features_test));
/* evaluate */
CEvaluation* eval=new CMeanSquaredError();
SG_SPRINT("mean squared error: %f\n",
eval->evaluate(predicted_labels, labels_test));
/* clean up */
SG_UNREF(eval);
SG_UNREF(labels_test)
SG_UNREF(predicted_labels);
SG_UNREF(svm);
}
int main()
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
test_libsvr();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/base/Parameter.h>
#include <shogun/io/SerializableAsciiFile.h>
#include <shogun/features/DenseFeatures.h>
#include <unistd.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
class CTestClass : public CSGObject
{
public:
CTestClass() {}
CTestClass(float64_t number, float64_t vec_start, int32_t features_start)
{
m_number=number;
m_vec=SGVector<float64_t>(10);
SGVector<float64_t>::range_fill_vector(m_vec.vector, m_vec.vlen, vec_start);
m_mat=SGMatrix<float64_t>(3,3);
SGVector<float64_t>::range_fill_vector(m_mat.matrix, m_mat.num_cols*m_mat.num_rows,
vec_start);
SGMatrix<int32_t> data=SGMatrix<int32_t>(3, 2);
SGVector<int32_t>::range_fill_vector(data.matrix, data.num_rows*data.num_cols,
features_start);
m_features=new CDenseFeatures<int32_t>(data);
SG_REF(m_features);
m_parameters->add(&m_number, "number", "Test variable");
m_parameters->add(&m_mat, "mat", "Test variable");
m_parameters->add(&m_vec, "vec", "Test variable");
m_parameters->add((CSGObject**)&m_features, "features", "Test variable");
}
virtual ~CTestClass()
{
SG_UNREF(m_features);
}
void print()
{
SG_PRINT("m_number=%f\n", m_number);
SGVector<float64_t>::display_vector(m_vec.vector, m_vec.vlen, "m_vec");
SGVector<float64_t>::display_vector(m_mat.matrix, m_mat.num_cols*m_mat.num_rows,
"m_mat");
SGMatrix<int32_t> features=m_features->get_feature_matrix();
SGMatrix<int32_t>::display_matrix(features.matrix, features.num_rows,
features.num_cols, "m_features");
}
inline virtual const char* get_name() const { return "TestClass"; }
public:
float64_t m_number;
SGVector<float64_t> m_vec;
SGMatrix<float64_t> m_mat;
CDenseFeatures<int32_t>* m_features;
};
void test_test_class_serial()
{
char filename_tmp[] = "serialization_test.XXXXXX";
int fd = mkstemp(filename_tmp);
ASSERT(fd != -1);
int retval = close(fd);
ASSERT(retval != -1);
char* filename = filename_tmp;
CTestClass* to_save=new CTestClass(10, 0, 0);
CTestClass* to_load=new CTestClass(20, 10, 66);
SG_SPRINT("original instance 1:\n");
to_save->print();
SG_SPRINT("original instance 2:\n");
to_load->print();
CSerializableAsciiFile* file;
file=new CSerializableAsciiFile(filename, 'w');
to_save->save_serializable(file);
file->close();
SG_UNREF(file);
file=new CSerializableAsciiFile(filename, 'r');
to_load->load_serializable(file);
file->close();
SG_UNREF(file);
SG_SPRINT("deserialized instance 1 into instance 2: (should be equal to "
"first instance)\n");
to_load->print();
/* assert that variable is equal */
ASSERT(to_load->m_number==to_save->m_number);
/* assert that vector is equal */
for (index_t i=0; i<to_load->m_vec.vlen; ++i)
{
ASSERT(to_load->m_vec[i]==to_save->m_vec[i]);
}
/* assert that matrix is equal */
for (index_t i=0; i<to_load->m_mat.num_cols*to_load->m_mat.num_rows; ++i)
{
ASSERT(to_load->m_mat[i]==to_save->m_mat[i]);
}
/* assert that features object is equal */
SGMatrix<int32_t> features_loaded=to_load->m_features->get_feature_matrix();
SGMatrix<int32_t> features_saved=to_save->m_features->get_feature_matrix();
for (index_t i=0; i<features_loaded.num_rows*features_loaded.num_cols; ++i)
{
ASSERT(features_loaded[i]==features_saved[i]);
}
SG_UNREF(to_save);
SG_UNREF(to_load);
unlink(filename);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
test_test_class_serial();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/base/Parameter.h>
#include <shogun/io/SerializableAsciiFile.h>
#include <shogun/io/SerializableJsonFile.h>
#include <shogun/io/SerializableXmlFile.h>
#include <shogun/io/SerializableHdf5File.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
const char* filename="filename.txt";
void print(Parameter* p)
{
TParameter* param=p->get_parameter(0);
SGVector<float64_t>* v=(SGVector<float64_t>*)param->m_parameter;
CMath::display_vector(v->vector, v->vlen, "vector:");
param=p->get_parameter(1);
SGMatrix<float64_t>* m=(SGMatrix<float64_t>*)param->m_parameter;
CMath::display_matrix(m->matrix, m->num_rows, m->num_cols, "matrix:");
}
void check_content_equal(Parameter* save_param, Parameter* load_param)
{
TParameter* p;
p=save_param->get_parameter(0);
SGVector<float64_t>* sv=(SGVector<float64_t>*)p->m_parameter;
p=save_param->get_parameter(1);
SGMatrix<float64_t>* sm=(SGMatrix<float64_t>*)p->m_parameter;
p=load_param->get_parameter(0);
SGVector<float64_t>* lv=(SGVector<float64_t>*)p->m_parameter;
p=load_param->get_parameter(1);
SGMatrix<float64_t>* lm=(SGMatrix<float64_t>*)p->m_parameter;
ASSERT(sv->vlen==lv->vlen);
ASSERT(sm->num_rows==lm->num_rows);
ASSERT(sm->num_cols==lm->num_cols);
for (index_t i=0; i<sv->vlen; ++i)
ASSERT(sv->vector[i]==lv->vector[i]);
for (index_t i=0; i<sm->num_cols*sm->num_rows; ++i)
ASSERT(sm->matrix[i]==lm->matrix[i]);
}
void test_ascii(Parameter* save_param, Parameter* load_param)
{
SG_SPRINT("testing ascii serialization\n");
SG_SPRINT("to save:\n");
print(save_param);
SG_SPRINT("loaded before:\n");
print(load_param);
CSerializableAsciiFile* file;
file=new CSerializableAsciiFile(filename, 'w');
save_param->save(file);
file->close();
SG_UNREF(file);
file=new CSerializableAsciiFile(filename, 'r');
load_param->load(file);
file->close();
SG_UNREF(file);
SG_SPRINT("loaded after:\n");
print(load_param);
check_content_equal(save_param, load_param);
}
void test_hdf5(Parameter* save_param, Parameter* load_param)
{
/* TODO, HDF5 file leaks memory */
SG_SPRINT("testing hdf5 serialization\n");
SG_SPRINT("to save:\n");
print(save_param);
SG_SPRINT("loaded before:\n");
print(load_param);
CSerializableHdf5File* file;
file=new CSerializableHdf5File(filename, 'w');
save_param->save(file);
file->close();
SG_UNREF(file);
file=new CSerializableHdf5File(filename, 'r');
load_param->load(file);
file->close();
SG_UNREF(file);
SG_SPRINT("loaded after:\n");
print(load_param);
check_content_equal(save_param, load_param);
}
void test_json(Parameter* save_param, Parameter* load_param)
{
/* TODO, json file leaks memory, also save methods */
SG_SPRINT("testing json serialization\n");
SG_SPRINT("to save:\n");
print(save_param);
SG_SPRINT("loaded before:\n");
print(load_param);
CSerializableJsonFile* file;
file=new CSerializableJsonFile(filename, 'w');
save_param->save(file);
file->close();
SG_UNREF(file);
file=new CSerializableJsonFile(filename, 'r');
load_param->load(file);
file->close();
SG_UNREF(file);
SG_SPRINT("loaded after:\n");
print(load_param);
check_content_equal(save_param, load_param);
}
void test_xml(Parameter* save_param, Parameter* load_param)
{
/* TODO, xml file leaks memory and produces a read error */
SG_SPRINT("testing xml serialization\n");
SG_SPRINT("to save:\n");
print(save_param);
SG_SPRINT("loaded before:\n");
print(load_param);
CSerializableXmlFile* file;
file=new CSerializableXmlFile(filename, 'w');
save_param->save(file);
file->close();
SG_UNREF(file);
file=new CSerializableXmlFile(filename, 'r');
load_param->load(file);
file->close();
SG_UNREF(file);
SG_SPRINT("loaded after:\n");
print(load_param);
check_content_equal(save_param, load_param);
}
void reset_values(Parameter* save_param, Parameter* load_param)
{
TParameter* p;
p=save_param->get_parameter(0);
SGVector<float64_t>* sv=(SGVector<float64_t>*)p->m_parameter;
p=save_param->get_parameter(1);
SGMatrix<float64_t>* sm=(SGMatrix<float64_t>*)p->m_parameter;
p=load_param->get_parameter(0);
SGVector<float64_t>* lv=(SGVector<float64_t>*)p->m_parameter;
p=load_param->get_parameter(1);
SGMatrix<float64_t>* lm=(SGMatrix<float64_t>*)p->m_parameter;
sv->destroy_vector();
lv->destroy_vector();
sm->destroy_matrix();
lm->destroy_matrix();
*sv=SGVector<float64_t>(9);
*lv=SGVector<float64_t>(3);
*sm=SGMatrix<float64_t>(3, 3);
*lm=SGMatrix<float64_t>(4, 4);
CMath::range_fill_vector(sv->vector, sv->vlen);
CMath::range_fill_vector(sm->matrix, sm->num_rows*sm->num_cols);
CMath::fill_vector(lv->vector, lv->vlen, 0.0);
CMath::fill_vector(lm->matrix, lm->num_rows*lm->num_cols, 0.0);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
/* for serialization */
SGVector<float64_t> sv;
SGMatrix<float64_t> sm;
Parameter* sp=new Parameter();
sp->add(&sv, "vector", "description");
sp->add(&sm, "matrix", "description");
/* for deserialization */
SGVector<float64_t> lv;
SGMatrix<float64_t> lm;
Parameter* lp=new Parameter();
lp->add(&lv, "vector", "description");
lp->add(&lm, "matrix", "description");
/* still leaks memory TODO */
reset_values(sp, lp);
test_json(sp, lp);
reset_values(sp, lp);
test_ascii(sp, lp);
/* still leaks memory TODO */
reset_values(sp, lp);
test_hdf5(sp, lp);
/* still leaks memory TODO */
reset_values(sp, lp);
test_xml(sp, lp);
/* clean up */
sv.destroy_vector();
sm.destroy_matrix();
lv.destroy_vector();
lm.destroy_matrix();
delete sp;
delete lp;
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/io/SerializableAsciiFile.h>
using namespace shogun;
void test()
{
index_t n=10;
index_t n_class=3;
CMulticlassLabels* labels=new CMulticlassLabels();
SGVector<float64_t> lab(n);
for (index_t i=0; i<n; ++i)
lab[i]=i%n_class;
labels->set_labels(lab);
labels->allocate_confidences_for(n_class);
SGVector<float64_t> conf(n_class);
for (index_t i=0; i<n_class; ++i)
conf[i]=CMath::randn_double();
for (index_t i=0; i<n; ++i)
labels->set_multiclass_confidences(i, conf);
/* create serialized copy */
const char* filename="multiclass_labels.txt";
CSerializableAsciiFile* file=new CSerializableAsciiFile(filename, 'w');
labels->save_serializable(file);
file->close();
SG_UNREF(file);
file=new CSerializableAsciiFile(filename, 'r');
CMulticlassLabels* labels_loaded=new CMulticlassLabels();
labels_loaded->load_serializable(file);
file->close();
SG_UNREF(file);
/* compare */
labels->get_labels().display_vector("labels");
labels_loaded->get_labels().display_vector("labels_loaded");
for (index_t i=0; i<n_class; ++i)
{
labels->get_multiclass_confidences(i).display_vector("confidences");
labels_loaded->get_multiclass_confidences(i).display_vector("confidences_loaded");
}
SG_UNREF(labels_loaded);
SG_UNREF(labels);
}
int main()
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
test();
exit_shogun();
return 0;
}
#include <shogun/io/SGIO.h>
#include <shogun/base/init.h>
#include <shogun/lib/SGVector.h>
#include <shogun/lib/SGString.h>
#include <shogun/lib/SGSparseVector.h>
#include <shogun/lib/DynamicObjectArray.h>
#include <shogun/structure/FactorGraph.h>
#include <shogun/structure/FactorType.h>
#include <shogun/structure/Factor.h>
#include <shogun/labels/FactorGraphLabels.h>
#include <shogun/structure/MAPInference.h>
using namespace shogun;
inline int grid_to_index(int32_t x, int32_t y, int32_t w = 10)
{
return x + w*y;
}
inline void index_to_grid(int32_t index, int32_t& x, int32_t& y, int32_t w = 10)
{
x = index % w;
y = index / w;
}
void create_tree_graph(int hh, int ww)
{
SGVector<int32_t> card(2);
card[0] = 2;
card[1] = 2;
SGVector<float64_t> w(4);
w[0] = 0.0; // 0,0
w[1] = 0.5; // 1,0
w[2] = 0.5; // 0,1
w[3] = 0.0; // 1,1
int32_t tid = 0;
CTableFactorType* factortype = new CTableFactorType(tid, card, w);
SG_REF(factortype);
SGVector<int32_t> vc(hh*ww);
SGVector<int32_t>::fill_vector(vc.vector, vc.vlen, 2);
CFactorGraph* fg = new CFactorGraph(vc);
SG_REF(fg);
// Add factors
for (int32_t x = 0; x < ww; x++)
{
for (int32_t y = 0; y < hh; y++)
{
if (x > 0)
{
SGVector<float64_t> data;
SGVector<int32_t> var_index(2);
var_index[0] = grid_to_index(x,y,ww);
var_index[1] = grid_to_index(x-1,y,ww);
CFactor* fac1 = new CFactor(factortype, var_index, data);
fg->add_factor(fac1);
}
if (x == 0 && y > 0)
{
SGVector<float64_t> data;
SGVector<int32_t> var_index(2);
var_index[0] = grid_to_index(x,y-1,ww);
var_index[1] = grid_to_index(x,y,ww);
CFactor* fac1 = new CFactor(factortype, var_index, data);
fg->add_factor(fac1);
}
}
}
SG_UNREF(factortype);
fg->connect_components();
SG_SPRINT("is acyclic graph? %d\n", fg->is_acyclic_graph());
SG_SPRINT("is connected graph? %d\n", fg->is_connected_graph());
SG_SPRINT("is tree graph? %d\n", fg->is_tree_graph());
SG_SPRINT("num of edges: %d\n", fg->get_num_edges());
fg->compute_energies();
CMAPInference infer_met(fg, TREE_MAX_PROD);
infer_met.inference();
CFactorGraphObservation* fg_observ = infer_met.get_structured_outputs();
SGVector<int32_t> assignment = fg_observ->get_data();
SG_UNREF(fg_observ);
assignment.display_vector();
SG_UNREF(fg);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
//sg_io->set_loglevel(MSG_DEBUG);
create_tree_graph(30, 30);
exit_shogun();
return 0;
}
#include <shogun/io/SGIO.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/lib/Time.h>
#include <shogun/mathematics/Math.h>
#include <shogun/structure/PrimalMosekSOSVM.h>
#include <shogun/structure/DualLibQPBMSOSVM.h>
#include <shogun/structure/StochasticSOSVM.h>
#include <shogun/structure/FWSOSVM.h>
#include <shogun/structure/FactorType.h>
#include <shogun/structure/MAPInference.h>
#include <shogun/structure/FactorGraphModel.h>
#include <shogun/features/FactorGraphFeatures.h>
#include <shogun/labels/FactorGraphLabels.h>
using namespace shogun;
void test(int32_t num_samples)
{
CMath::init_random(17);
// define factor type
SGVector<int32_t> card(2);
card[0] = 2;
card[1] = 2;
SGVector<float64_t> w(8);
w[0] = 0.3; // 0,0
w[1] = 0.5; // 0,0
w[2] = 1.0; // 1,0
w[3] = 0.2; // 1,0
w[4] = 0.05; // 0,1
w[5] = 0.6; // 0,1
w[6] = -0.2; // 1,1
w[7] = 0.75; // 1,1
int32_t tid = 0;
CTableFactorType* factortype = new CTableFactorType(tid, card, w);
SG_REF(factortype);
// create features and labels
CFactorGraphFeatures* instances = new CFactorGraphFeatures(num_samples);
SG_REF(instances);
CFactorGraphLabels* labels = new CFactorGraphLabels(num_samples);
SG_REF(labels);
for (int32_t n = 0; n < num_samples; ++n)
{
// factor graph
SGVector<int32_t> vc(3);
SGVector<int32_t>::fill_vector(vc.vector, vc.vlen, 2);
CFactorGraph* fg = new CFactorGraph(vc);
// add factors
SGVector<float64_t> data1(2);
data1[0] = 2.0 * CMath::random(0.0, 1.0) - 1.0;
data1[1] = 2.0 * CMath::random(0.0, 1.0) - 1.0;
SGVector<int32_t> var_index1(2);
var_index1[0] = 0;
var_index1[1] = 1;
CFactor* fac1 = new CFactor(factortype, var_index1, data1);
fg->add_factor(fac1);
SGVector<float64_t> data2(2);
data2[0] = 2.0 * CMath::random(0.0, 1.0) - 1.0;
data2[1] = 2.0 * CMath::random(0.0, 1.0) - 1.0;
SGVector<int32_t> var_index2(2);
var_index2[0] = 1;
var_index2[1] = 2;
CFactor* fac2 = new CFactor(factortype, var_index2, data2);
fg->add_factor(fac2);
// add factor graph instance
instances->add_sample(fg);
fg->connect_components();
fg->compute_energies();
CMAPInference infer_met(fg, TREE_MAX_PROD);
infer_met.inference();
CFactorGraphObservation* fg_observ = infer_met.get_structured_outputs();
// add ground truth states
labels->add_label(fg_observ);
SG_UNREF(fg_observ);
}
#ifdef SHOW_DATA
// show labels
for (unsigned int n = 0; n < num_samples; ++n)
{
CFactorGraphObservation* fg_observ = CFactorGraphObservation::obtain_from_generic(labels->get_label(n));
SG_SPRINT("- sample %d:\n", n);
SGVector<int32_t> fst = fg_observ->get_data();
SGVector<int32_t>::display_vector(fst.vector, fst.vlen);
SG_UNREF(fg_observ);
}
#endif
SG_SPRINT("----------------------------------------------------\n");
CFactorGraphModel* model = new CFactorGraphModel(instances, labels, TREE_MAX_PROD, false);
SG_REF(model);
// initialize model parameters
SGVector<float64_t> w_truth = w.clone();
w.zero();
factortype->set_w(w);
model->add_factor_type(factortype);
#undef USE_MOSEK
#ifdef USE_MOSEK
// create primal mosek solver
CPrimalMosekSOSVM* primcp = new CPrimalMosekSOSVM(model, labels);
SG_REF(primcp);
primcp->set_regularization(0.01); // TODO: check 1000
#endif
// create BMRM solver
CDualLibQPBMSOSVM* bmrm = new CDualLibQPBMSOSVM(model, labels, 0.01);
bmrm->set_verbose(false);
SG_REF(bmrm);
// create SGD solver
CStochasticSOSVM* sgd = new CStochasticSOSVM(model, labels);
sgd->set_num_iter(100);
sgd->set_lambda(0.01);
SG_REF(sgd);
// create FW solver
CFWSOSVM* fw = new CFWSOSVM(model, labels);
fw->set_num_iter(100);
fw->set_lambda(0.01);
fw->set_gap_threshold(0.01);
SG_REF(fw);
// timer
CTime start;
float64_t t1 = start.cur_time_diff(false);
#ifdef USE_MOSEK
// train PrimalMosek
primcp->train();
float64_t t1 = start.cur_time_diff(false);
#endif
// train BMRM
bmrm->train();
float64_t t2 = start.cur_time_diff(false);
// train SGD
sgd->train();
float64_t t3 = start.cur_time_diff(false);
// train FW
fw->train();
float64_t t4 = start.cur_time_diff(false);
SG_SPRINT(">>>> PrimalMosekSOSVM trained in %9.4f\n", t1);
SG_SPRINT(">>>> BMRM trained in %9.4f\n", t2-t1);
SG_SPRINT(">>>> SGD trained in %9.4f\n", t3-t2);
SG_SPRINT(">>>> FW trained in %9.4f\n", t4-t3);
// check w
#ifdef USE_MOSEK
primcp->get_slacks().display_vector("slacks");
primcp->get_w().display_vector("w_mosek");
#endif
bmrm->get_w().display_vector("w_bmrm");
sgd->get_w().display_vector("w_sgd");
fw->get_w().display_vector("w_fw");
w_truth.display_vector("w_truth");
#ifdef USE_MOSEK
// Evaluation PrimalMosek
CStructuredLabels* labels_primcp = CLabelsFactory::to_structured(primcp->apply());
SG_REF(labels_primcp);
float64_t acc_loss_primcp = 0.0;
float64_t ave_loss_primcp = 0.0;
for (int32_t i=0; i<num_samples; ++i)
{
CStructuredData* y_pred = labels_primcp->get_label(i);
CStructuredData* y_truth = labels->get_label(i);
acc_loss_primcp += model->delta_loss(y_truth, y_pred);
SG_UNREF(y_pred);
SG_UNREF(y_truth);
}
ave_loss_primcp = acc_loss_primcp / static_cast<float64_t>(num_samples);
SG_SPRINT("primal mosek solver: average training loss = %f\n", ave_loss_primcp);
#endif
// Evaluation BMRM
CStructuredLabels* labels_bmrm = CLabelsFactory::to_structured(bmrm->apply());
SG_REF(labels_bmrm);
float64_t acc_loss_bmrm = 0.0;
float64_t ave_loss_bmrm = 0.0;
for (int32_t i=0; i<num_samples; ++i)
{
CStructuredData* y_pred = labels_bmrm->get_label(i);
CStructuredData* y_truth = labels->get_label(i);
acc_loss_bmrm += model->delta_loss(y_truth, y_pred);
SG_UNREF(y_pred);
SG_UNREF(y_truth);
}
ave_loss_bmrm = acc_loss_bmrm / static_cast<float64_t>(num_samples);
SG_SPRINT("bmrm solver: average training loss = %f\n", ave_loss_bmrm);
// Evaluation SGD
CStructuredLabels* labels_sgd = CLabelsFactory::to_structured(sgd->apply());
SG_REF(labels_sgd);
float64_t acc_loss_sgd = 0.0;
float64_t ave_loss_sgd = 0.0;
for (int32_t i=0; i<num_samples; ++i)
{
CStructuredData* y_pred = labels_sgd->get_label(i);
CStructuredData* y_truth = labels->get_label(i);
acc_loss_sgd += model->delta_loss(y_truth, y_pred);
SG_UNREF(y_pred);
SG_UNREF(y_truth);
}
ave_loss_sgd = acc_loss_sgd / static_cast<float64_t>(num_samples);
SG_SPRINT("sgd solver: average training loss = %f\n", ave_loss_sgd);
// Evaluation FW
CStructuredLabels* labels_fw = CLabelsFactory::to_structured(fw->apply());
SG_REF(labels_fw);
float64_t acc_loss_fw = 0.0;
float64_t ave_loss_fw = 0.0;
for (int32_t i=0; i<num_samples; ++i)
{
CStructuredData* y_pred = labels_fw->get_label(i);
CStructuredData* y_truth = labels->get_label(i);
acc_loss_fw += model->delta_loss(y_truth, y_pred);
SG_UNREF(y_pred);
SG_UNREF(y_truth);
}
ave_loss_fw = acc_loss_fw / static_cast<float64_t>(num_samples);
SG_SPRINT("fw solver: average training loss = %f\n", ave_loss_fw);
#ifdef USE_MOSEK
SG_UNREF(labels_primcp);
SG_UNREF(primcp);
#endif
SG_UNREF(labels_fw);
SG_UNREF(labels_sgd);
SG_UNREF(labels_bmrm);
SG_UNREF(fw);
SG_UNREF(sgd);
SG_UNREF(bmrm);
SG_UNREF(model);
SG_UNREF(labels);
SG_UNREF(instances);
SG_UNREF(factortype);
}
int main(int argc, char * argv[])
{
init_shogun_with_defaults();
//sg_io->set_loglevel(MSG_DEBUG);
test(100);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2014 Jiaolong Xu
* Copyright (C) 2014 Jiaolong Xu
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/io/LibSVMFile.h>
#include <shogun/lib/common.h>
#include <shogun/lib/Time.h>
#include <shogun/lib/DelimiterTokenizer.h>
#include <shogun/lib/SGSparseVector.h>
#include <shogun/base/DynArray.h>
#include <shogun/base/init.h>
#include <shogun/mathematics/Math.h>
#include <shogun/structure/DualLibQPBMSOSVM.h>
#include <shogun/structure/StochasticSOSVM.h>
#include <shogun/structure/FactorType.h>
#include <shogun/structure/MAPInference.h>
#include <shogun/structure/FactorGraphModel.h>
#include <shogun/features/FactorGraphFeatures.h>
#include <shogun/labels/FactorGraphLabels.h>
#include <shogun/structure/SOSVMHelper.h>
using namespace shogun;
#define NUM_STATUS 2 // each class has binary labels
const char FNAME_TRAIN[] = "../../../../data/multilabel/scene_train";
const char FNAME_TEST[] = "../../../../data/multilabel/scene_test";
enum EGraphStructure
{
TREE = 0, // tree-structure graph
FULL = 1 // full-connected graph
};
struct MultilabelParameter
{
EGraphStructure graph_type;
EMAPInferType infer_type;
int32_t sgd_num_iter;
float64_t sgd_lambda;
MultilabelParameter() : graph_type(FULL), infer_type(GRAPH_CUT),
sgd_num_iter(200), sgd_lambda(0.0001)
{}
MultilabelParameter(EGraphStructure graph, EMAPInferType infer,
int32_t num_iter = 200, float64_t lambda = 0.0001)
: graph_type(graph), infer_type(infer), sgd_num_iter(num_iter), sgd_lambda(lambda)
{}
~MultilabelParameter() {}
};
void read_data(const char * fname, SGMatrix<int32_t>& labels, SGMatrix<float64_t>& feats)
{
// sparse data from matrix
CLibSVMFile * svmfile = new CLibSVMFile(fname);
SGSparseVector<float64_t>* spv_feats;
SGVector<float64_t>* pv_labels;
int32_t dim_feat;
int32_t num_samples;
int32_t num_classes;
svmfile->get_sparse_matrix(spv_feats, dim_feat, num_samples, pv_labels, num_classes);
SG_SPRINT("Number of the samples: %d\n", num_samples);
SG_SPRINT("Dimention of the feature: %d\n", dim_feat+1);
SG_SPRINT("Number of classes: %d\n", num_classes);
feats = SGMatrix<float64_t>(dim_feat+1, num_samples);
labels = SGMatrix<int32_t>(num_classes, num_samples);
feats.zero();
labels.zero();
for (int32_t i = 0; i < num_samples; i++)
{
SGVector<float64_t> v_feat = spv_feats[i].get_dense();
SGVector<float64_t> v_labels = pv_labels[i];
for (int32_t f = 0; f < v_feat.size(); f++)
feats(f, i) = v_feat[f];
feats(dim_feat, i) = 1.0; // bias
for (int32_t l = 0; l < v_labels.size(); l++)
labels((int32_t)v_labels[l], i) = 1;
}
SG_UNREF(svmfile);
SG_FREE(spv_feats);
SG_FREE(pv_labels);
}
/** get tree-structured graph */
SGMatrix< int32_t > get_edges_tree()
{
SGMatrix< int32_t > label_tree_index;
// A tree structure is defined by a 2-d matrix where
// each row stores the indecies of a pair of connect factors
// Define label tree structure
label_tree_index = SGMatrix< int32_t > (5, 2);
label_tree_index[0] = 0;
label_tree_index[1] = 0;
label_tree_index[2] = 1;
label_tree_index[3] = 4;
label_tree_index[4] = 2;
label_tree_index[5] = 2;
label_tree_index[6] = 3;
label_tree_index[7] = 4;
label_tree_index[8] = 5;
label_tree_index[9] = 5;
return label_tree_index;
}
/** get full-connected graph */
SGMatrix< int32_t > get_edges_full(const int32_t num_classes)
{
// A full-connected graph is defined by a 2-d matrix where
// each row stores the indecies of a pair of connected nodes
int32_t num_rows = num_classes*(num_classes - 1)/2;
ASSERT(num_rows > 0);
SGMatrix< int32_t > mat(num_rows, 2);
int32_t k = 0;
for (int32_t i = 0; i < num_classes - 1; i++)
{
for (int32_t j = i + 1; j < num_classes; j++)
{
mat[num_rows + k] = j;
mat[k++] = i;
}
}
return mat;
}
/** Get graph structure
*
* @param graph_type tree structure or full-connected graph
* @param num_classes number of classes
*
* @return a matrix contains the indeces of the pairwise edges*/
SGMatrix<int32_t> get_edge_list(EGraphStructure graph_type, int32_t num_classes)
{
SGMatrix<int32_t> mat;
switch (graph_type)
{
case TREE:
mat = get_edges_tree();
break;
case FULL:
mat = get_edges_full(num_classes);
break;
default:
mat = get_edges_tree();
break;
}
return mat;
}
void build_factor_graph(MultilabelParameter param, SGMatrix<float64_t> feats, SGMatrix<int32_t> labels,
CFactorGraphFeatures * fg_feats, CFactorGraphLabels * fg_labels,
const DynArray<CTableFactorType *>& v_ftp_u,
const DynArray<CTableFactorType *>& v_ftp_t)
{
int32_t num_sample = labels.num_cols;
int32_t num_classes = labels.num_rows;
int32_t dim = feats.num_rows;
SGMatrix< int32_t > mat_edges = get_edge_list(param.graph_type, num_classes);
int32_t num_edges = mat_edges.num_rows;
// prepare features and labels in factor graph
for (int32_t n = 0; n < num_sample; n++)
{
SGVector<int32_t> vc(num_classes);
SGVector<int32_t>::fill_vector(vc.vector, vc.vlen, NUM_STATUS);
CFactorGraph * fg = new CFactorGraph(vc);
float64_t * pfeat = feats.get_column_vector(n);
SGVector<float64_t> feat_i(dim);
memcpy(feat_i.vector, pfeat, dim * sizeof(float64_t));
// add unary factors
for (int32_t u = 0; u < num_classes; u++)
{
SGVector<int32_t> var_index_u(1);
var_index_u[0] = u;
CFactor * fac_u = new CFactor(v_ftp_u[u], var_index_u, feat_i);
fg->add_factor(fac_u);
}
// add pairwise factors
for (int32_t t = 0; t < num_edges; t++)
{
SGVector<float64_t> data_t(1);
data_t[0] = 1.0;
SGVector<int32_t> var_index_t = mat_edges.get_row_vector(t);
CFactor * fac_t = new CFactor(v_ftp_t[t], var_index_t, data_t);
fg->add_factor(fac_t);
}
// add factor graph instance
fg_feats->add_sample(fg);
// add label
int32_t * plabs = labels.get_column_vector(n);
SGVector<int32_t> states_gt(num_classes);
memcpy(states_gt.vector, plabs, num_classes * sizeof(int32_t));
SGVector<float64_t> loss_weights(num_classes);
SGVector<float64_t>::fill_vector(loss_weights.vector, loss_weights.vlen, 1.0/num_classes);
CFactorGraphObservation * fg_obs = new CFactorGraphObservation(states_gt, loss_weights);
fg_labels->add_label(fg_obs);
}
}
void evaluate(CFactorGraphModel * model, int32_t num_samples, CStructuredLabels * labels_sgd, \
CFactorGraphLabels * fg_labels, float64_t & ave_error)
{
float64_t acc_loss_sgd = 0.0;
for (int32_t i = 0; i < num_samples; ++i)
{
CStructuredData * y_pred = labels_sgd->get_label(i);
CStructuredData * y_truth = fg_labels->get_label(i);
acc_loss_sgd += model->delta_loss(y_truth, y_pred);
SG_UNREF(y_pred);
SG_UNREF(y_truth);
}
ave_error = acc_loss_sgd / static_cast<float64_t>(num_samples);
}
void test(MultilabelParameter param, SGMatrix<int32_t> labels_train, SGMatrix<float64_t> feats_train,
SGMatrix<int32_t> labels_test, SGMatrix<float64_t> feats_test)
{
int32_t num_sample_train = labels_train.num_cols;
int32_t num_classes = labels_train.num_rows;
int32_t dim = feats_train.num_rows;
// Build factor graph
SGMatrix< int32_t > mat_edges = get_edge_list(param.graph_type, num_classes);
int32_t num_edges = mat_edges.num_rows;
int32_t tid;
// we have l = num_classes different weights: w_1, w_2, ..., w_l
// so we create num_classes different unary factor types
DynArray<CTableFactorType *> v_ftp_u;
for (int32_t u = 0; u < num_classes; u++)
{
tid = u;
SGVector<int32_t> card_u(1);
card_u[0] = NUM_STATUS;
SGVector<float64_t> w_u(dim * NUM_STATUS);
w_u.zero();
v_ftp_u.append_element(new CTableFactorType(tid, card_u, w_u));
}
// define factor type: tree edge factor
// note that each edge is a new type
DynArray<CTableFactorType *> v_ftp_t;
for (int32_t t = 0; t < num_edges; t++)
{
tid = t + num_classes;
SGVector<int32_t> card_t(2);
card_t[0] = NUM_STATUS;
card_t[1] = NUM_STATUS;
SGVector<float64_t> w_t(NUM_STATUS * NUM_STATUS);
w_t.zero();
v_ftp_t.append_element(new CTableFactorType(tid, card_t, w_t));
}
// prepare features and labels in factor graph
CFactorGraphFeatures * fg_feats_train = new CFactorGraphFeatures(num_sample_train);
SG_REF(fg_feats_train);
CFactorGraphLabels * fg_labels_train = new CFactorGraphLabels(num_sample_train);
SG_REF(fg_labels_train);
build_factor_graph(param, feats_train, labels_train, fg_feats_train, fg_labels_train, v_ftp_u, v_ftp_t);
SG_SPRINT("----------------------------------------------------\n");
CFactorGraphModel * model = new CFactorGraphModel(fg_feats_train, fg_labels_train, param.infer_type, false);
SG_REF(model);
// initialize model parameters
for (int32_t u = 0; u < num_classes; u++)
model->add_factor_type(v_ftp_u[u]);
for (int32_t t = 0; t < num_edges; t++)
model->add_factor_type(v_ftp_t[t]);
// create SGD solver
CStochasticSOSVM * sgd = new CStochasticSOSVM(model, fg_labels_train, true);
sgd->set_num_iter(param.sgd_num_iter);
sgd->set_lambda(param.sgd_lambda);
SG_REF(sgd);
// timer
CTime start;
// train SGD
sgd->train();
float64_t t2 = start.cur_time_diff(false);
SG_SPRINT("SGD trained in %9.4f\n", t2);
// Evaluation SGD
CStructuredLabels * labels_sgd = CLabelsFactory::to_structured(sgd->apply());
SG_REF(labels_sgd);
float64_t ave_loss_sgd = 0.0;
evaluate(model, num_sample_train, labels_sgd, fg_labels_train, ave_loss_sgd);
SG_SPRINT("sgd solver: average training loss = %f\n", ave_loss_sgd);
SG_UNREF(labels_sgd);
if(labels_test.num_cols > 0)
{
// prepare features and labels in factor graph
int32_t num_sample_test = labels_test.num_cols;
CFactorGraphFeatures * fg_feats_test = new CFactorGraphFeatures(num_sample_test);
SG_REF(fg_feats_test);
CFactorGraphLabels * fg_labels_test = new CFactorGraphLabels(num_sample_test);
SG_REF(fg_labels_test);
build_factor_graph(param, feats_test, labels_test, fg_feats_test, fg_labels_test, v_ftp_u, v_ftp_t);
sgd->set_features(fg_feats_test);
sgd->set_labels(fg_labels_test);
labels_sgd = CLabelsFactory::to_structured(sgd->apply());
evaluate(model, num_sample_test, labels_sgd, fg_labels_test, ave_loss_sgd);
SG_REF(labels_sgd);
SG_SPRINT("sgd solver: average testing error = %f\n", ave_loss_sgd);
SG_UNREF(fg_feats_test);
SG_UNREF(fg_labels_test);
}
SG_UNREF(labels_sgd);
SG_UNREF(sgd);
SG_UNREF(model);
SG_UNREF(fg_feats_train);
SG_UNREF(fg_labels_train);
}
int main(int argc, char * argv[])
{
init_shogun_with_defaults();
// Training data
SGMatrix<int32_t> labels_train;
SGMatrix<float64_t> feats_train;
// Testing data
SGMatrix<int32_t> labels_test;
SGMatrix<float64_t> feats_test;
// Train and test with real data
FILE * pfile = fopen(FNAME_TRAIN, "r");
if (pfile == NULL)
{
SG_SPRINT("Unable to open file: %s\n", FNAME_TRAIN);
return 0;
}
fclose(pfile);
pfile = fopen(FNAME_TEST, "r");
if (pfile == NULL)
{
SG_SPRINT("Unable to open file: %s\n", FNAME_TEST);
return 0;
}
fclose(pfile);
SG_SPRINT("Experiment with real dataset: \n");
read_data(FNAME_TRAIN, labels_train, feats_train);
read_data(FNAME_TEST, labels_test, feats_test);
MultilabelParameter param;
SG_SPRINT("\nExample 1: tree structure, max-product inference\n");
param = MultilabelParameter(TREE, TREE_MAX_PROD);
test(param, labels_train, feats_train, labels_test, feats_test);
SG_SPRINT("\nExample 2.1: tree structure, graph-cuts inference\n");
param = MultilabelParameter(TREE, GRAPH_CUT);
test(param, labels_train, feats_train, labels_test, feats_test);
SG_SPRINT("\nExample 2.2: full-connected graph, graph-cuts inference\n");
param = MultilabelParameter(FULL, GRAPH_CUT);
test(param, labels_train, feats_train, labels_test, feats_test);
SG_SPRINT("\nExample 3.1: tree structure, GEMPLP inference\n");
param = MultilabelParameter(TREE, GEMPLP);
test(param, labels_train, feats_train, labels_test, feats_test);
SG_SPRINT("\nExample 3.2: full-connected graph, GEMPLP inference\n");
param = MultilabelParameter(FULL, GEMPLP);
test(param, labels_train, feats_train, labels_test, feats_test);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char * argv[])
{
return 0;
}
#endif //USE_GPL_SHOGUN
#include <shogun/features/MatrixFeatures.h>
#include <shogun/loss/HingeLoss.h>
#include <shogun/structure/SequenceLabels.h>
#include <shogun/structure/HMSVMModel.h>
#include <shogun/structure/PrimalMosekSOSVM.h>
using namespace shogun;
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
#ifdef USE_MOSEK
// Create structured labels
CSequenceLabels* labels = new CSequenceLabels(5, 2);
// Label sequences of with two states
int32_t lab1[] = {0, 0, 1, 1};
int32_t lab2[] = {1, 1, 1, 0};
int32_t lab3[] = {0, 1, 0, 1};
int32_t lab4[] = {1, 0, 0, 0};
int32_t lab5[] = {0, 1, 1, 0};
// No need for ref_counting in SGVector since the data is allocated
// during compilation time
labels->add_vector_label(SGVector< int32_t >(lab1, 4, false));
labels->add_vector_label(SGVector< int32_t >(lab2, 4, false));
labels->add_vector_label(SGVector< int32_t >(lab3, 4, false));
labels->add_vector_label(SGVector< int32_t >(lab4, 4, false));
labels->add_vector_label(SGVector< int32_t >(lab5, 4, false));
// Create features
CMatrixFeatures< float64_t >* features = new CMatrixFeatures< float64_t >(5, 3);
// Observation matrices with three states
float64_t mat1[] = { 0., 1., 2., 1., 1., 1., 2., 2., 2., 1., 0., 1. };
float64_t mat2[] = { 1., 2., 2., 0., 2., 1., 1., 1., 0., 0., 2., 1. };
float64_t mat3[] = { 0., 1., 2., 1., 1., 2., 1., 1., 0., 0., 1., 0. };
float64_t mat4[] = { 1., 2., 1., 0., 2., 1., 0., 2., 0., 1., 0., 2. };
float64_t mat5[] = { 2., 2., 0., 1., 2., 1., 0., 1., 2., 0., 2., 0. };
features->set_feature_vector(SGMatrix< float64_t >(mat1, 3, 4, false), 0);
features->set_feature_vector(SGMatrix< float64_t >(mat2, 3, 4, false), 1);
features->set_feature_vector(SGMatrix< float64_t >(mat3, 3, 4, false), 2);
features->set_feature_vector(SGMatrix< float64_t >(mat4, 3, 4, false), 3);
features->set_feature_vector(SGMatrix< float64_t >(mat5, 3, 4, false), 4);
CHMSVMModel* model = new CHMSVMModel(features, labels, SMT_TWO_STATE, 3);
SG_REF(model);
CPrimalMosekSOSVM* sosvm = new CPrimalMosekSOSVM(model, labels);
SG_REF(sosvm);
sosvm->train();
sosvm->get_w().display_vector("w");
sosvm->get_slacks().display_vector("slacks");
// Free memory
SG_UNREF(sosvm);
SG_UNREF(model);
#endif /* USE_MOSEK */
exit_shogun();
return 0;
}
#include <shogun/base/init.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/evaluation/MulticlassAccuracy.h>
#include <shogun/evaluation/StructuredAccuracy.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/io/SGIO.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/labels/StructuredLabels.h>
#include <shogun/lib/common.h>
#include <shogun/loss/HingeLoss.h>
#include <shogun/machine/LinearMulticlassMachine.h>
#include <shogun/mathematics/Math.h>
#include <shogun/multiclass/MulticlassOneVsRestStrategy.h>
#include <shogun/structure/MulticlassSOLabels.h>
#include <shogun/structure/MulticlassModel.h>
#include <shogun/structure/PrimalMosekSOSVM.h>
#include <shogun/structure/DualLibQPBMSOSVM.h>
#include <shogun/structure/StochasticSOSVM.h>
#include <shogun/lib/Time.h>
#include <shogun/base/init.h>
#include <stdio.h>
using namespace shogun;
#define DIMS 2
#define EPSILON 10e-5
#define NUM_SAMPLES 100
#define NUM_CLASSES 10
char FNAME[] = "data.out";
void gen_rand_data(SGVector< float64_t > labs, SGMatrix< float64_t > feats)
{
float64_t means[DIMS];
float64_t stds[DIMS];
FILE* pfile = fopen(FNAME, "w");
for ( int32_t c = 0 ; c < NUM_CLASSES ; ++c )
{
for ( int32_t j = 0 ; j < DIMS ; ++j )
{
means[j] = CMath::random(-100, 100);
stds[j] = CMath::random( 1, 5);
}
for ( int32_t i = 0 ; i < NUM_SAMPLES ; ++i )
{
labs[c*NUM_SAMPLES+i] = c;
fprintf(pfile, "%d", c);
for ( int32_t j = 0 ; j < DIMS ; ++j )
{
feats[(c*NUM_SAMPLES+i)*DIMS + j] =
CMath::normal_random(means[j], stds[j]);
fprintf(pfile, " %f", feats[(c*NUM_SAMPLES+i)*DIMS + j]);
}
fprintf(pfile, "\n");
}
}
fclose(pfile);
}
void read_data(SGVector< float64_t > labs, SGMatrix< float64_t > feats)
{
FILE* pfile = fopen(FNAME, "r");
if (pfile == NULL)
SG_SERROR("Unable to open file: %s\n", FNAME);
int32_t label, idx;
float32_t value;
for ( int32_t i = 0 ; i < NUM_SAMPLES*NUM_CLASSES ; ++i )
{
fscanf(pfile, "%d", &label);
labs[i] = label;
for ( int32_t j = 0 ; j < DIMS ; ++j )
{
fscanf(pfile, "%d:%f", &idx, &value);
feats[i*DIMS + j] = value;
}
}
fclose(pfile);
}
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
SGVector< float64_t > labs(NUM_CLASSES*NUM_SAMPLES);
SGMatrix< float64_t > feats(DIMS, NUM_CLASSES*NUM_SAMPLES);
gen_rand_data(labs, feats);
//read_data(labs, feats);
// Create train labels
CMulticlassSOLabels* labels = new CMulticlassSOLabels(labs);
CMulticlassLabels* mlabels = new CMulticlassLabels(labs);
// Create train features
CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feats);
// Create structured model
CMulticlassModel* model = new CMulticlassModel(features, labels);
// Create SO-SVM
CPrimalMosekSOSVM* sosvm = new CPrimalMosekSOSVM(model, labels);
CDualLibQPBMSOSVM* bundle = new CDualLibQPBMSOSVM(model, labels, 100);
CStochasticSOSVM* sgd = new CStochasticSOSVM(model, labels);
bundle->set_verbose(false);
SG_REF(sosvm);
SG_REF(bundle);
SG_REF(sgd);
CTime start;
sosvm->train();
float64_t t1 = start.cur_time_diff(false);
bundle->train();
float64_t t2 = start.cur_time_diff(false);
sgd->train();
float64_t t3 = start.cur_time_diff(false);
SG_SPRINT(">>>> PrimalMosekSOSVM trained in %9.4f\n", t1);
SG_SPRINT(">>>> BMRM trained in %9.4f\n", t2-t1);
SG_SPRINT(">>>> SGD trained in %9.4f\n", t3-t2);
CStructuredLabels* out = CLabelsFactory::to_structured(sosvm->apply());
CStructuredLabels* bout = CLabelsFactory::to_structured(bundle->apply());
CStructuredLabels* sout = CLabelsFactory::to_structured(sgd->apply());
// Create liblinear svm classifier with L2-regularized L2-loss
CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC);
// Add some configuration to the svm
svm->set_epsilon(EPSILON);
svm->set_bias_enabled(false);
// Create a multiclass svm classifier that consists of several of the previous one
CLinearMulticlassMachine* mc_svm =
new CLinearMulticlassMachine( new CMulticlassOneVsRestStrategy(),
(CDotFeatures*) features, svm, mlabels);
SG_REF(mc_svm);
// Train the multiclass machine using the data passed in the constructor
mc_svm->train();
CMulticlassLabels* mout = CLabelsFactory::to_multiclass(mc_svm->apply());
SGVector< float64_t > w = sosvm->get_w();
for ( int32_t i = 0 ; i < w.vlen ; ++i )
SG_SPRINT("%10f ", w[i]);
SG_SPRINT("\n\n");
for ( int32_t i = 0 ; i < NUM_CLASSES ; ++i )
{
CLinearMachine* lm = (CLinearMachine*) mc_svm->get_machine(i);
SGVector< float64_t > mw = lm->get_w();
for ( int32_t j = 0 ; j < mw.vlen ; ++j )
SG_SPRINT("%10f ", mw[j]);
SG_UNREF(lm); // because of CLinearMulticlassMachine::get_machine()
}
SG_SPRINT("\n");
CStructuredAccuracy* structured_evaluator = new CStructuredAccuracy();
CMulticlassAccuracy* multiclass_evaluator = new CMulticlassAccuracy();
SG_REF(structured_evaluator);
SG_REF(multiclass_evaluator);
SG_SPRINT("SO-SVM: %5.2f%\n", 100.0*structured_evaluator->evaluate(out, labels));
SG_SPRINT("BMRM: %5.2f%\n", 100.0*structured_evaluator->evaluate(bout, labels));
SG_SPRINT("SGD: %5.2f%\n", 100.0*structured_evaluator->evaluate(sout, labels));
SG_SPRINT("MC: %5.2f%\n", 100.0*multiclass_evaluator->evaluate(mout, mlabels));
// Free memory
SG_UNREF(multiclass_evaluator);
SG_UNREF(structured_evaluator);
SG_UNREF(mout);
SG_UNREF(mc_svm);
SG_UNREF(sgd);
SG_UNREF(bundle);
SG_UNREF(sosvm);
SG_UNREF(sout);
SG_UNREF(bout);
SG_UNREF(out);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Michal Uricar
* Copyright (C) 2012 Michal Uricar
*/
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/base/init.h>
#include <shogun/classifier/svm/LibLinear.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/io/SGIO.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/labels/StructuredLabels.h>
#include <shogun/lib/common.h>
#include <shogun/machine/LinearMulticlassMachine.h>
#include <shogun/mathematics/Math.h>
#include <shogun/multiclass/MulticlassOneVsRestStrategy.h>
#include <shogun/structure/MulticlassSOLabels.h>
#include <shogun/structure/BmrmStatistics.h>
#include <shogun/structure/MulticlassModel.h>
#include <shogun/structure/DualLibQPBMSOSVM.h>
#include <shogun/io/streaming/StreamingAsciiFile.h>
#include <shogun/features/streaming/StreamingSparseFeatures.h>
using namespace shogun;
#define DIMS 2
#define EPSILON 10e-5
#define NUM_SAMPLES 30
#define NUM_CLASSES 3
char FNAME[] = "data.svmlight";
/** Reads multiclass trainig data stored in svmlight format (i.e. label nz_idx_1:value1 nz_idx_2:value2 ... nz_idx_N:valueN )
*
* @param fname path to file with training data
* @param DIM dimension of features
* @param N number of feature vectors
* @param labs vector with labels
* @param feats matrix with features
*/
void read_data(const char fname[], uint32_t DIM, uint32_t N, SGVector<float64_t> labs, SGMatrix<float64_t> feats)
{
CStreamingAsciiFile* file=new CStreamingAsciiFile(fname);
SG_REF(file);
CStreamingSparseFeatures< float64_t >* stream_features=
new CStreamingSparseFeatures< float64_t >(file, true, 1024);
SG_REF(stream_features);
SGVector<float64_t > vec(DIM);
stream_features->start_parser();
uint32_t num_vectors=0;
while (stream_features->get_next_example())
{
vec.zero();
stream_features->add_to_dense_vec(1.0, vec, DIM);
labs[num_vectors]=stream_features->get_label();
for (uint32_t i=0; i<DIM; ++i)
feats[num_vectors*DIM+i]=vec[i];
num_vectors++;
stream_features->release_example();
}
stream_features->end_parser();
SG_UNREF(stream_features);
}
/** Generates random multiclass training data and stores them in svmlight format
*
* @param labs returned vector with labels
* @param feats returned matrix with features
*/
void gen_rand_data(SGVector< float64_t > labs, SGMatrix< float64_t > feats)
{
float64_t means[DIMS];
float64_t stds[DIMS];
FILE* pfile = fopen(FNAME, "w");
CMath::init_random(17);
for ( int32_t c = 0 ; c < NUM_CLASSES ; ++c )
{
for ( int32_t j = 0 ; j < DIMS ; ++j )
{
means[j] = CMath::random(-100, 100);
stds[j] = CMath::random( 1, 5);
}
for ( int32_t i = 0 ; i < NUM_SAMPLES ; ++i )
{
labs[c*NUM_SAMPLES+i] = c;
fprintf(pfile, "%d", c);
for ( int32_t j = 0 ; j < DIMS ; ++j )
{
feats[(c*NUM_SAMPLES+i)*DIMS + j] =
CMath::normal_random(means[j], stds[j]);
fprintf(pfile, " %d:%f", j+1, feats[(c*NUM_SAMPLES+i)*DIMS + j]);
}
fprintf(pfile, "\n");
}
}
fclose(pfile);
}
int main(int argc, char * argv[])
{
// initialization
//-------------------------------------------------------------------------
float64_t lambda=0.01, eps=0.01;
bool icp=1;
uint32_t cp_models=1;
ESolver solver=BMRM;
uint32_t feat_dim, num_feat;
init_shogun_with_defaults();
if (argc > 1 && argc < 8)
{
SG_SERROR("Usage: so_multiclass_BMRM <data.in> <feat_dim> <num_feat> <lambda> <icp> <epsilon> <solver> [<cp_models>]\n");
return -1;
}
if (argc > 1)
{
// parse command line arguments for parameters setting
SG_SPRINT("arg[1] = %s\n", argv[1]);
feat_dim=::atoi(argv[2]);
num_feat=::atoi(argv[3]);
lambda=::atof(argv[4]);
icp=::atoi(argv[5]);
eps=::atof(argv[6]);
if (strcmp("BMRM", argv[7])==0)
solver=BMRM;
if (strcmp("PPBMRM", argv[7])==0)
solver=PPBMRM;
if (strcmp("P3BMRM", argv[7])==0)
solver=P3BMRM;
if (argc > 8)
{
cp_models=::atoi(argv[8]);
}
}
else
{
// default parameters
feat_dim=DIMS;
num_feat=NUM_SAMPLES*NUM_CLASSES;
lambda=1e3;
icp=1;
eps=0.01;
solver=BMRM;
}
SGVector<float64_t> labs(num_feat);
SGMatrix<float64_t> feats(feat_dim, num_feat);
if (argc==1)
{
gen_rand_data(labs, feats);
}
else
{
// read data
read_data(argv[1], feat_dim, num_feat, labs, feats);
}
// Create train labels
CMulticlassSOLabels* labels = new CMulticlassSOLabels(labs);
// Create train features
CDenseFeatures< float64_t >* features =
new CDenseFeatures< float64_t >(feats);
// Create structured model
CMulticlassModel* model = new CMulticlassModel(features, labels);
// Create SO-SVM
CDualLibQPBMSOSVM* sosvm =
new CDualLibQPBMSOSVM(
model,
labels,
lambda);
SG_REF(sosvm);
sosvm->set_cleanAfter(10);
sosvm->set_cleanICP(icp);
sosvm->set_TolRel(eps);
sosvm->set_cp_models(cp_models);
sosvm->set_solver(solver);
// Train
//-------------------------------------------------------------------------
SG_SPRINT("Train using lambda = %lf ICP removal = %d \n",
sosvm->get_lambda(), sosvm->get_cleanICP());
sosvm->train();
BmrmStatistics res = sosvm->get_result();
SG_SPRINT("result = { Fp=%lf, Fd=%lf, nIter=%d, nCP=%d, nzA=%d, exitflag=%d }\n",
res.Fp, res.Fd, res.nIter, res.nCP, res.nzA, res.exitflag);
CStructuredLabels* out =
CLabelsFactory::to_structured(sosvm->apply());
SG_REF(out);
SG_SPRINT("\n");
// Compute error
//-------------------------------------------------------------------------
float64_t error=0.0;
for (uint32_t i=0; i<num_feat; ++i)
{
CRealNumber* rn = CRealNumber::obtain_from_generic( out->get_label(i) );
error+=(rn->value==labs.get_element(i)) ? 0.0 : 1.0;
SG_UNREF(rn); // because of out->get_label(i) above
}
SG_SPRINT("Error = %lf %% \n", error/num_feat*100);
// Free memory
SG_UNREF(sosvm);
SG_UNREF(out);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char * argv[])
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* This software is distributed under BSD 3-clause license (see LICENSE file).
*
* Copyright(C) 2014 Abinash Panda
* Written(W) 2014 Abinash Panda
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/StructuredAccuracy.h>
#include <shogun/features/SparseFeatures.h>
#include <shogun/io/LibSVMFile.h>
#include <shogun/io/SGIO.h>
#include <shogun/lib/SGSparseVector.h>
#include <shogun/structure/MultilabelModel.h>
#include <shogun/structure/MultilabelSOLabels.h>
#include <shogun/structure/StochasticSOSVM.h>
#include <shogun/structure/DualLibQPBMSOSVM.h>
#include <shogun/structure/PrimalMosekSOSVM.h>
#include <shogun/lib/Time.h>
#include <shogun/mathematics/Math.h>
using namespace shogun;
void load_data(const char * file_name,
SGMatrix<float64_t> &feats_matrix,
int32_t &dim_feat,
int32_t &num_samples,
SGVector<int32_t> * &multilabels,
int32_t &num_classes)
{
CLibSVMFile * file = new CLibSVMFile(file_name);
ASSERT(file != NULL);
SG_REF(file);
SGSparseVector<float64_t> * feats;
SGVector<float64_t> * labels;
file->get_sparse_matrix(
feats,
dim_feat,
num_samples,
labels,
num_classes);
feats_matrix = SGMatrix<float64_t>(dim_feat, num_samples);
/** preparation of data for multilabel model */
for (index_t i = 0; i < num_samples; i++)
{
SGSparseVector<float64_t> feat_sample = feats[i];
for (index_t j = 0; j < dim_feat; j++)
feats_matrix[i * dim_feat + j] = feat_sample.get_feature(j);
}
multilabels = SG_MALLOC(SGVector<int32_t>, num_samples);
for (index_t i = 0; i < num_samples; i++)
{
SGVector<float64_t> label_sample = labels[i];
SGVector<int32_t> multilabel_sample(label_sample.vlen);
for (index_t j = 0; j < label_sample.vlen; j++)
multilabel_sample[j] = label_sample[j];
CMath::qsort(multilabel_sample);
multilabels[i] = multilabel_sample;
}
SG_UNREF(file);
SG_FREE(feats);
SG_FREE(labels);
}
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
const char train_file_name[] = "../../../../data/multilabel/yeast_train.svm";
const char test_file_name[] = "../../../../data/multilabel/yeast_test.svm";
SGMatrix<float64_t> feats_matrix;
SGVector<int32_t> * multilabels;
int32_t dim_feat;
int32_t num_samples;
int32_t num_classes;
load_data(
train_file_name,
feats_matrix,
dim_feat,
num_samples,
multilabels,
num_classes);
SG_SPRINT("Number of samples = %d\n", num_samples);
SG_SPRINT("Dimension of feature = %d\n", dim_feat);
SG_SPRINT("Number of classes = %d\n", num_classes);
SG_SPRINT("-------------------------------------------\n");
CMultilabelSOLabels * mlabels = new CMultilabelSOLabels(num_samples,
num_classes);
SG_REF(mlabels);
mlabels->set_sparse_labels(multilabels);
CSparseFeatures<float64_t> * features = new CSparseFeatures<float64_t>(
feats_matrix);
SG_REF(features);
CMultilabelModel * model = new CMultilabelModel(features, mlabels);
SG_REF(model);
CStochasticSOSVM * sgd = new CStochasticSOSVM(model, mlabels);
SG_REF(sgd);
CDualLibQPBMSOSVM * bundle = new CDualLibQPBMSOSVM(model, mlabels, 100);
bundle->set_verbose(false);
SG_REF(bundle);
CPrimalMosekSOSVM * sosvm = new CPrimalMosekSOSVM(model, mlabels);
SG_REF(sosvm);
CTime * start = new CTime();
SG_REF(start);
sgd->train();
float64_t t1 = start->cur_time_diff(false);
bundle->train();
float64_t t2 = start->cur_time_diff(false);
sosvm->train();
float64_t t3 = start->cur_time_diff(false);
SG_SPRINT(">>> Time taken for training using %s = %f\n", sgd->get_name(),
t1);
SG_SPRINT(">>> Time taken for training using %s = %f\n", bundle->get_name(),
t2 - t1);
SG_SPRINT(">>> Time taken for learning using %s = %f\n", sosvm->get_name(),
t3 - t2);
SGMatrix<float64_t> test_feats_matrix;
SGVector<int32_t> * test_multilabels;
load_data(
test_file_name,
test_feats_matrix,
dim_feat,
num_samples,
test_multilabels,
num_classes);
CSparseFeatures<float64_t> * test_features = new CSparseFeatures<float64_t>(
test_feats_matrix);
SG_REF(test_features);
CMultilabelSOLabels * test_labels = new CMultilabelSOLabels(num_samples,
num_classes);
SG_REF(test_labels);
test_labels->set_sparse_labels(test_multilabels);
CStructuredLabels * out = CLabelsFactory::to_structured(
sgd->apply(test_features));
CStructuredLabels * bout = CLabelsFactory::to_structured(
bundle->apply(test_features));
CStructuredLabels * sout = CLabelsFactory::to_structured(
sosvm->apply(test_features));
CStructuredAccuracy * evaluator = new CStructuredAccuracy();
SG_REF(evaluator);
SG_SPRINT(">>> Accuracy of multilabel classification using %s = %f\n",
sgd->get_name(), evaluator->evaluate(out, test_labels));
SG_SPRINT(">>> Accuracy of multilabel classification using %s = %f\n",
bundle->get_name(), evaluator->evaluate(bout, test_labels));
SG_SPRINT(">>> Accuracy of multilabel classification using %s = %f\n",
sosvm->get_name(), evaluator->evaluate(sout, test_labels));
SG_UNREF(bout);
SG_UNREF(bundle);
SG_UNREF(evaluator);
SG_UNREF(features);
SG_UNREF(mlabels);
SG_UNREF(model);
SG_UNREF(out);
SG_UNREF(sgd);
SG_UNREF(sosvm);
SG_UNREF(sout);
SG_UNREF(start);
SG_UNREF(test_features);
SG_UNREF(test_labels);
SG_FREE(multilabels);
SG_FREE(test_multilabels);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Saurabh Mahindre
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/LOOCrossValidationSplitting.h>
#include <shogun/labels/RegressionLabels.h>
using namespace shogun;
int main(int argc, char **argv)
{
init_shogun_with_defaults();
index_t num_labels;
index_t runs=10;
while (runs-->0)
{
num_labels=CMath::random(10, 50);
//SG_SPRINT("num_labels=%d\n\n", num_labels);
/* build labels */
CRegressionLabels* labels=new CRegressionLabels(num_labels);
for (index_t i=0; i<num_labels; ++i)
{
labels->set_label(i, CMath::random(-10.0, 10.0));
// SG_SPRINT("label(%d)=%.18g\n", i, labels->get_label(i));
}
//SG_SPRINT("\n");
/* build Leave one out splitting strategy */
CLOOCrossValidationSplitting* splitting=
new CLOOCrossValidationSplitting(labels);
splitting->build_subsets();
for (index_t i=0; i<num_labels; ++i)
{
//SG_SPRINT("subset %d\n", i);
SGVector<index_t> subset=splitting->generate_subset_indices(i);
SGVector<index_t> inverse=splitting->generate_subset_inverse(i);
SGVector<index_t>::display_vector(subset.vector, subset.vlen, "subset indices");
SGVector<index_t>::display_vector(inverse.vector, inverse.vlen, "inverse indices");
/*for (index_t j=0; j<subset.vlen; ++j)
SG_SPRINT("%d:(%f),", subset.vector[j], labels->get_label(subset.vector[j]));
SG_SPRINT("\n");
SG_SPRINT("inverse %d\n", i);
for (index_t j=0; j<inverse.vlen; ++j)
SG_SPRINT("%d(%d),", inverse.vector[j],
(int32_t)labels->get_label(inverse.vector[j]));
SG_SPRINT("\n\n");
*/
}
/* clean up */
SG_UNREF(splitting);
}
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/CrossValidationSplitting.h>
#include <shogun/labels/RegressionLabels.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
index_t num_labels;
index_t num_subsets;
index_t runs=100;
while (runs-->0)
{
num_labels=CMath::random(10, 150);
num_subsets=CMath::random(1, 5);
index_t desired_size=CMath::round(
(float64_t)num_labels/(float64_t)num_subsets);
/* this will throw an error */
if (num_labels<num_subsets)
continue;
SG_SPRINT("num_labels=%d\nnum_subsets=%d\n\n", num_labels, num_subsets);
/* build labels */
CRegressionLabels* labels=new CRegressionLabels(num_labels);
for (index_t i=0; i<num_labels; ++i)
{
labels->set_label(i, CMath::random(-10.0, 10.0));
SG_SPRINT("label(%d)=%.18g\n", i, labels->get_label(i));
}
SG_SPRINT("\n");
/* build splitting strategy */
CCrossValidationSplitting* splitting=
new CCrossValidationSplitting(labels, num_subsets);
/* build index sets (twice to ensure memory is not leaking) */
splitting->build_subsets();
splitting->build_subsets();
for (index_t i=0; i<num_subsets; ++i)
{
SG_SPRINT("subset %d\n", i);
SGVector<index_t> subset=splitting->generate_subset_indices(i);
SGVector<index_t> inverse=splitting->generate_subset_inverse(i);
SGVector<index_t>::display_vector(subset.vector, subset.vlen, "subset indices");
SGVector<index_t>::display_vector(inverse.vector, inverse.vlen, "inverse indices");
SG_SPRINT("checking subset size: %d vs subset desired size %d\n",
subset.vlen, desired_size);
ASSERT(CMath::abs(subset.vlen-desired_size)<=1);
ASSERT(subset.vlen+inverse.vlen==num_labels);
for (index_t j=0; j<subset.vlen; ++j)
SG_SPRINT("%d:(%f),", subset.vector[j], labels->get_label(j));
SG_SPRINT("\n");
SG_SPRINT("inverse %d\n", i);
for (index_t j=0; j<inverse.vlen; ++j)
SG_SPRINT("%d(%d),", inverse.vector[j],
(int32_t)labels->get_label(j));
SG_SPRINT("\n\n");
}
/* clean up */
SG_UNREF(splitting);
}
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Heiko Strathmann
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*/
#include <shogun/base/init.h>
#include <shogun/evaluation/StratifiedCrossValidationSplitting.h>
#include <shogun/labels/MulticlassLabels.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char **argv)
{
init_shogun(&print_message, &print_message, &print_message);
index_t num_labels, num_classes, num_subsets;
index_t runs=50;
while (runs-->0)
{
num_labels=CMath::random(5, 100);
num_classes=CMath::random(2, 10);
num_subsets=CMath::random(1, 10);
/* this will throw an error */
if (num_labels<num_subsets)
continue;
SG_SPRINT("num_labels=%d\nnum_classes=%d\nnum_subsets=%d\n\n",
num_labels, num_classes, num_subsets);
/* build labels */
CMulticlassLabels* labels=new CMulticlassLabels(num_labels);
for (index_t i=0; i<num_labels; ++i)
{
labels->set_label(i, CMath::random()%num_classes);
SG_SPRINT("label(%d)=%.18g\n", i, labels->get_label(i));
}
SG_SPRINT("\n");
/* print classes */
SGVector<float64_t> classes=labels->get_unique_labels();
SGVector<float64_t>::display_vector(classes.vector, classes.vlen, "classes");
/* build splitting strategy */
CStratifiedCrossValidationSplitting* splitting=
new CStratifiedCrossValidationSplitting(labels, num_subsets);
/* build index sets (twice to ensure memory is not leaking) */
splitting->build_subsets();
splitting->build_subsets();
for (index_t i=0; i<num_subsets; ++i)
{
SGVector<index_t> subset=splitting->generate_subset_indices(i);
SGVector<index_t> inverse=splitting->generate_subset_inverse(i);
SG_SPRINT("subset %d\n", i);
for (index_t j=0; j<subset.vlen; ++j)
SG_SPRINT("%d(%d),", subset.vector[j],
(int32_t)labels->get_label(j));
SG_SPRINT("\n");
SG_SPRINT("inverse %d\n", i);
for (index_t j=0; j<inverse.vlen; ++j)
SG_SPRINT("%d(%d),", inverse.vector[j],
(int32_t)labels->get_label(j));
SG_SPRINT("\n\n");
}
/* check whether number of labels in every subset is nearly equal */
for (index_t i=0; i<num_classes; ++i)
{
SG_SPRINT("checking class %d\n", i);
/* count number of elements for this class */
SGVector<index_t> temp=splitting->generate_subset_indices(0);
int32_t count=0;
for (index_t j=0; j<temp.vlen; ++j)
{
if ((int32_t)labels->get_label(temp.vector[j])==i)
++count;
}
/* check all subsets for same ratio */
for (index_t j=0; j<num_subsets; ++j)
{
SGVector<index_t> subset=splitting->generate_subset_indices(j);
int32_t temp_count=0;
for (index_t k=0; k<subset.vlen; ++k)
{
if ((int32_t)labels->get_label(subset.vector[k])==i)
++temp_count;
}
/* at most one difference */
SG_SPRINT("number in subset %d: %d\n", j, temp_count);
ASSERT(CMath::abs(temp_count-count)<=1);
}
}
/* clean up */
SG_UNREF(splitting);
}
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/statistics/HSIC.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/mathematics/Statistics.h>
using namespace shogun;
void create_fixed_data_kernel_small(CFeatures*& features_p,
CFeatures*& features_q, CKernel*& kernel_p, CKernel*& kernel_q)
{
index_t m=2;
index_t d=3;
SGMatrix<float64_t> p(d,2*m);
for (index_t i=0; i<2*d*m; ++i)
p.matrix[i]=i;
// p.display_matrix("p");
SGMatrix<float64_t> q(d,2*m);
for (index_t i=0; i<2*d*m; ++i)
q.matrix[i]=i+10;
// q.display_matrix("q");
features_p=new CDenseFeatures<float64_t>(p);
features_q=new CDenseFeatures<float64_t>(q);
float64_t sigma_x=2;
float64_t sigma_y=3;
float64_t sq_sigma_x_twice=sigma_x*sigma_x*2;
float64_t sq_sigma_y_twice=sigma_y*sigma_y*2;
/* shoguns kernel width is different */
kernel_p=new CGaussianKernel(10, sq_sigma_x_twice);
kernel_q=new CGaussianKernel(10, sq_sigma_y_twice);
}
void create_fixed_data_kernel_big(CFeatures*& features_p,
CFeatures*& features_q, CKernel*& kernel_p, CKernel*& kernel_q)
{
index_t m=10;
index_t d=7;
SGMatrix<float64_t> p(d,m);
for (index_t i=0; i<d*m; ++i)
p.matrix[i]=(i+8)%3;
// p.display_matrix("p");
SGMatrix<float64_t> q(d,m);
for (index_t i=0; i<d*m; ++i)
q.matrix[i]=((i+10)*(i%4+2))%4;
// q.display_matrix("q");
features_p=new CDenseFeatures<float64_t>(p);
features_q=new CDenseFeatures<float64_t>(q);
float64_t sigma_x=2;
float64_t sigma_y=3;
float64_t sq_sigma_x_twice=sigma_x*sigma_x*2;
float64_t sq_sigma_y_twice=sigma_y*sigma_y*2;
/* shoguns kernel width is different */
kernel_p=new CGaussianKernel(10, sq_sigma_x_twice);
kernel_q=new CGaussianKernel(10, sq_sigma_y_twice);
}
/** tests the hsic statistic for a single fixed data case and ensures
* equality with sma implementation */
void test_hsic_fixed()
{
CFeatures* features_p=NULL;
CFeatures* features_q=NULL;
CKernel* kernel_p=NULL;
CKernel* kernel_q=NULL;
create_fixed_data_kernel_small(features_p, features_q, kernel_p, kernel_q);
index_t m=features_p->get_num_vectors();
CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q);
/* assert matlab result, note that compute statistic computes m*hsic */
float64_t difference=hsic->compute_statistic();
SG_SPRINT("hsic fixed: %f\n", difference);
ASSERT(CMath::abs(difference-m*0.164761446385339)<10E-16);
SG_UNREF(hsic);
}
void test_hsic_gamma()
{
CFeatures* features_p=NULL;
CFeatures* features_q=NULL;
CKernel* kernel_p=NULL;
CKernel* kernel_q=NULL;
create_fixed_data_kernel_big(features_p, features_q, kernel_p, kernel_q);
CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q);
hsic->set_null_approximation_method(HSIC_GAMMA);
float64_t p=hsic->compute_p_value(0.05);
SG_SPRINT("p-value: %f\n", p);
// disabled as I think previous inverse_gamma_cdf was faulty
// now unit test fails. Needs to be investigated statistically
//ASSERT(CMath::abs(p-0.172182287884256)<10E-15);
SG_UNREF(hsic);
}
void test_hsic_sample_null()
{
CFeatures* features_p=NULL;
CFeatures* features_q=NULL;
CKernel* kernel_p=NULL;
CKernel* kernel_q=NULL;
create_fixed_data_kernel_big(features_p, features_q, kernel_p, kernel_q);
CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q);
/* do sampling null */
hsic->set_null_approximation_method(PERMUTATION);
float64_t p=hsic->compute_p_value(0.05);
SG_SPRINT("p-value: %f\n", p);
/* ensure that sampling null of hsic leads to same results as using
* CKernelIndependenceTest */
CMath::init_random(1);
float64_t mean1=CStatistics::mean(hsic->sample_null());
float64_t var1=CStatistics::variance(hsic->sample_null());
SG_SPRINT("mean1=%f, var1=%f\n", mean1, var1);
CMath::init_random(1);
float64_t mean2=CStatistics::mean(
hsic->CKernelIndependenceTest::sample_null());
float64_t var2=CStatistics::variance(hsic->sample_null());
SG_SPRINT("mean2=%f, var2=%f\n", mean2, var2);
/* assert than results are the same from bot sampling null impl. */
ASSERT(CMath::abs(mean1-mean2)<10E-8);
ASSERT(CMath::abs(var1-var2)<10E-8);
SG_UNREF(hsic);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
test_hsic_fixed();
test_hsic_gamma();
test_hsic_sample_null();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/statistics/LinearTimeMMD.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/streaming/generators/MeanShiftDataGenerator.h>
#include <shogun/mathematics/Statistics.h>
using namespace shogun;
void linear_time_mmd()
{
/* note that the linear time statistic is designed for much larger datasets
* so increase to get reasonable results */
index_t m=1000;
index_t dim=2;
float64_t difference=0.5;
/* streaming data generator for mean shift distributions */
CMeanShiftDataGenerator* gen_p=new CMeanShiftDataGenerator(0, dim);
CMeanShiftDataGenerator* gen_q=new CMeanShiftDataGenerator(difference, dim);
/* set kernel a-priori. usually one would do some kernel selection. See
* other examples for this. */
float64_t width=10;
CGaussianKernel* kernel=new CGaussianKernel(10, width);
/* create linear time mmd instance */
index_t blocksize=1000;
CLinearTimeMMD* mmd=new CLinearTimeMMD(kernel, gen_p, gen_q, m, blocksize);
/* perform test: compute p-value and test if null-hypothesis is rejected for
* a test level of 0.05 */
float64_t alpha=0.05;
/* using bootstrapping (not reccomended for linear time MMD, since slow).
* Also, in practice, use at least 250 iterations */
mmd->set_null_approximation_method(PERMUTATION);
mmd->set_num_null_samples(10);
float64_t p_value_bootstrap=mmd->perform_test();
/* reject if p-value is smaller than test level */
SG_SPRINT("bootstrap: p!=q: %d\n", p_value_bootstrap<alpha);
/* using Gaussian approximation (use large sample size, check type I error).
* Also, in practice, use at least 250 iterations */
mmd->set_null_approximation_method(MMD1_GAUSSIAN);
float64_t p_value_gaussian=mmd->perform_test();
/* reject if p-value is smaller than test level */
SG_SPRINT("gaussian approx: p!=q: %d\n", p_value_gaussian<alpha);
/* compute tpye I and II error (use many more trials in practice).
* Type I error is only estimated to check MMD1_GAUSSIAN method for
* estimating the null distribution. Note that testing has to happen on
* difference data than kernel selection, but the linear time mmd does this
* implicitly and we used a fixed kernel here. */
index_t num_trials=5;
SGVector<float64_t> typeIerrors(num_trials);
SGVector<float64_t> typeIIerrors(num_trials);
for (index_t i=0; i<num_trials; ++i)
{
/* this effectively means that p=q - rejecting is tpye I error */
mmd->set_simulate_h0(true);
typeIerrors[i]=mmd->perform_test()>alpha;
mmd->set_simulate_h0(false);
typeIIerrors[i]=mmd->perform_test()>alpha;
}
SG_SPRINT("type I error: %f\n", CStatistics::mean(typeIerrors));
SG_SPRINT("type II error: %f\n", CStatistics::mean(typeIIerrors));
SG_UNREF(mmd);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
linear_time_mmd();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/statistics/LinearTimeMMD.h>
#include <shogun/statistics/QuadraticTimeMMD.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/statistics/MMDKernelSelectionCombOpt.h>
#include <shogun/statistics/MMDKernelSelectionCombMaxL2.h>
#endif //USE_GPL_SHOGUN
#include <shogun/statistics/MMDKernelSelectionOpt.h>
#include <shogun/statistics/MMDKernelSelectionMax.h>
#include <shogun/statistics/MMDKernelSelectionMedian.h>
#include <shogun/features/streaming/StreamingFeatures.h>
#include <shogun/features/streaming/generators/GaussianBlobsDataGenerator.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CombinedKernel.h>
#include <shogun/mathematics/Statistics.h>
using namespace shogun;
void kernel_choice_linear_time_mmd_opt_single()
{
/* Note that the linear time mmd is designed for large datasets. Results on
* this small number will be bad (unstable, type I error wrong) */
index_t m=1000;
index_t num_blobs=3;
float64_t distance=3;
float64_t stretch=10;
float64_t angle=CMath::PI/4;
CGaussianBlobsDataGenerator* gen_p=new CGaussianBlobsDataGenerator(
num_blobs, distance, stretch, angle);
CGaussianBlobsDataGenerator* gen_q=new CGaussianBlobsDataGenerator(
num_blobs, distance, 1, 1);
/* create kernels */
CCombinedKernel* combined=new CCombinedKernel();
float64_t sigma_from=-3;
float64_t sigma_to=10;
float64_t sigma_step=1;
float64_t sigma=sigma_from;
while (sigma<=sigma_to)
{
/* shoguns kernel width is different */
float64_t width=CMath::pow(2.0, sigma);
float64_t sq_width_twice=width*width*2;
combined->append_kernel(new CGaussianKernel(10, sq_width_twice));
sigma+=sigma_step;
}
/* create MMD instance */
CLinearTimeMMD* mmd=new CLinearTimeMMD(combined, gen_p, gen_q, m);
/* kernel selection instance with regularisation term. May be replaced by
* other methods for selecting single kernels */
CMMDKernelSelectionOpt* selection=
new CMMDKernelSelectionOpt(mmd, 10E-5);
//
/* select kernel that maximised MMD */
// CMMDKernelSelectionMax* selection=
// new CMMDKernelSelectionMax(mmd);
// /* select kernel with width closest to median data distance */
// CMMDKernelSelectionMedian* selection=
// new CMMDKernelSelectionMedian(mmd, 10E-5);
/* compute measures.
* For Opt: ratio of MMD and standard deviation
* For Max: MMDs of single kernels
* for Medigan: Does not work! */
SG_SPRINT("computing ratios\n");
SGVector<float64_t> ratios=selection->compute_measures();
ratios.display_vector("ratios");
/* select kernel using the maximum ratio (and cast) */
SG_SPRINT("selecting kernel\n");
CKernel* selected=selection->select_kernel();
CGaussianKernel* casted=CGaussianKernel::obtain_from_generic(selected);
SG_SPRINT("selected kernel width: %f\n", casted->get_width());
mmd->set_kernel(selected);
SG_UNREF(casted);
SG_UNREF(selected);
mmd->set_null_approximation_method(MMD1_GAUSSIAN);
/* compute tpye I and II error (use many more trials). Type I error is only
* estimated to check MMD1_GAUSSIAN method for estimating the null
* distribution. Note that testing has to happen on difference data than
* kernel selecting, but the linear time mmd does this implicitly */
float64_t alpha=0.05;
index_t num_trials=5;
SGVector<float64_t> typeIerrors(num_trials);
SGVector<float64_t> typeIIerrors(num_trials);
for (index_t i=0; i<num_trials; ++i)
{
/* this effectively means that p=q - rejecting is tpye I error */
mmd->set_simulate_h0(true);
typeIerrors[i]=mmd->perform_test()>alpha;
mmd->set_simulate_h0(false);
typeIIerrors[i]=mmd->perform_test()>alpha;
}
SG_SPRINT("type I error: %f\n", CStatistics::mean(typeIerrors));
SG_SPRINT("type II error: %f\n", CStatistics::mean(typeIIerrors));
SG_UNREF(selection);
}
void kernel_choice_linear_time_mmd_opt_comb()
{
#ifdef USE_GPL_SHOGUN
/* Note that the linear time mmd is designed for large datasets. Results on
* this small number will be bad (unstable, type I error wrong) */
index_t m=1000;
index_t num_blobs=3;
float64_t distance=3;
float64_t stretch=10;
float64_t angle=CMath::PI/4;
CGaussianBlobsDataGenerator* gen_p=new CGaussianBlobsDataGenerator(
num_blobs, distance, stretch, angle);
CGaussianBlobsDataGenerator* gen_q=new CGaussianBlobsDataGenerator(
num_blobs, distance, 1, 1);
/* create kernels */
CCombinedKernel* combined=new CCombinedKernel();
float64_t sigma_from=-3;
float64_t sigma_to=10;
float64_t sigma_step=1;
float64_t sigma=sigma_from;
index_t num_kernels=0;
while (sigma<=sigma_to)
{
/* shoguns kernel width is different */
float64_t width=CMath::pow(2.0, sigma);
float64_t sq_width_twice=width*width*2;
combined->append_kernel(new CGaussianKernel(10, sq_width_twice));
sigma+=sigma_step;
num_kernels++;
}
/* create MMD instance */
CLinearTimeMMD* mmd=new CLinearTimeMMD(combined, gen_p, gen_q, m);
/* kernel selection instance with regularisation term. May be replaced by
* other methods for selecting single kernels */
CMMDKernelSelectionCombOpt* selection=
new CMMDKernelSelectionCombOpt(mmd, 10E-5);
/* maximise L2 regularised MMD */
// CMMDKernelSelectionCombMaxL2* selection=
// new CMMDKernelSelectionCombMaxL2(mmd, 10E-5);
/* select kernel (does the same as above, but sets weights to kernel) */
SG_SPRINT("selecting kernel\n");
CKernel* selected=selection->select_kernel();
CCombinedKernel* casted=CCombinedKernel::obtain_from_generic(selected);
casted->get_subkernel_weights().display_vector("weights");
mmd->set_kernel(selected);
SG_UNREF(casted);
SG_UNREF(selected);
/* compute tpye I and II error (use many more trials). Type I error is only
* estimated to check MMD1_GAUSSIAN method for estimating the null
* distribution. Note that testing has to happen on difference data than
* kernel selecting, but the linear time mmd does this implicitly */
mmd->set_null_approximation_method(MMD1_GAUSSIAN);
float64_t alpha=0.05;
index_t num_trials=5;
SGVector<float64_t> typeIerrors(num_trials);
SGVector<float64_t> typeIIerrors(num_trials);
for (index_t i=0; i<num_trials; ++i)
{
/* this effectively means that p=q - rejecting is tpye I error */
mmd->set_simulate_h0(true);
typeIerrors[i]=mmd->perform_test()>alpha;
mmd->set_simulate_h0(false);
typeIIerrors[i]=mmd->perform_test()>alpha;
}
SG_SPRINT("type I error: %f\n", CStatistics::mean(typeIerrors));
SG_SPRINT("type II error: %f\n", CStatistics::mean(typeIIerrors));
SG_UNREF(selection);
#endif //USE_GPL_SHOGUN
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
/* select a single kernel for linear time MMD */
kernel_choice_linear_time_mmd_opt_single();
/* select combined kernels for linear time MMD */
kernel_choice_linear_time_mmd_opt_comb();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Heiko Strathmann
*/
#include <shogun/base/init.h>
#include <shogun/statistics/QuadraticTimeMMD.h>
#include <shogun/kernel/GaussianKernel.h>
#include <shogun/kernel/CustomKernel.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/features/streaming/generators/MeanShiftDataGenerator.h>
#include <shogun/mathematics/Statistics.h>
using namespace shogun;
void quadratic_time_mmd()
{
/* number of examples kept low in order to make things fast */
index_t m=30;
index_t dim=2;
float64_t difference=0.5;
/* streaming data generator for mean shift distributions */
CMeanShiftDataGenerator* gen_p=new CMeanShiftDataGenerator(0, dim);
CMeanShiftDataGenerator* gen_q=new CMeanShiftDataGenerator(difference, dim);
/* stream some data from generator */
CFeatures* feat_p=gen_p->get_streamed_features(m);
CFeatures* feat_q=gen_q->get_streamed_features(m);
/* set kernel a-priori. usually one would do some kernel selection. See
* other examples for this. */
float64_t width=10;
CGaussianKernel* kernel=new CGaussianKernel(10, width);
/* create quadratic time mmd instance. Note that this constructor
* copies p and q and does not reference them */
CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, feat_p, feat_q);
/* perform test: compute p-value and test if null-hypothesis is rejected for
* a test level of 0.05 */
float64_t alpha=0.05;
/* using permutation (slow, not the most reliable way. Consider pre-
* computing the kernel when using it, see below).
* Also, in practice, use at least 250 iterations */
mmd->set_null_approximation_method(PERMUTATION);
mmd->set_num_null_samples(3);
float64_t p_value=mmd->perform_test();
/* reject if p-value is smaller than test level */
SG_SPRINT("bootstrap: p!=q: %d\n", p_value<alpha);
/* using spectrum method. Use at least 250 samples from null.
* This is consistent but sometimes breaks, always monitor type I error.
* See tutorial for number of eigenvalues to use .
* Only works with BIASED statistic */
mmd->set_statistic_type(BIASED);
mmd->set_null_approximation_method(MMD2_SPECTRUM);
mmd->set_num_eigenvalues_spectrum(3);
mmd->set_num_samples_spectrum(250);
p_value=mmd->perform_test();
/* reject if p-value is smaller than test level */
SG_SPRINT("spectrum: p!=q: %d\n", p_value<alpha);
/* using gamma method. This is a quick hack, which works most of the time
* but is NOT guaranteed to. See tutorial for details.
* Only works with BIASED statistic */
mmd->set_statistic_type(BIASED);
mmd->set_null_approximation_method(MMD2_GAMMA);
p_value=mmd->perform_test();
/* reject if p-value is smaller than test level */
SG_SPRINT("gamma: p!=q: %d\n", p_value<alpha);
/* compute tpye I and II error (use many more trials in practice).
* Type I error is not necessary if one uses permutation. We do it here
* anyway, but note that this is an efficient way of computing it.
* Also note that testing has to happen on
* difference data than kernel selection, but the linear time mmd does this
* implicitly and we used a fixed kernel here. */
mmd->set_null_approximation_method(PERMUTATION);
mmd->set_num_null_samples(5);
index_t num_trials=5;
SGVector<float64_t> type_I_errors(num_trials);
SGVector<float64_t> type_II_errors(num_trials);
SGVector<index_t> inds(2*m);
inds.range_fill();
CFeatures* p_and_q=mmd->get_p_and_q();
/* use a precomputed kernel to be faster */
kernel->init(p_and_q, p_and_q);
CCustomKernel* precomputed=new CCustomKernel(kernel);
mmd->set_kernel(precomputed);
for (index_t i=0; i<num_trials; ++i)
{
/* this effectively means that p=q - rejecting is tpye I error */
CMath::permute(inds);
precomputed->add_row_subset(inds);
precomputed->add_col_subset(inds);
type_I_errors[i]=mmd->perform_test()>alpha;
precomputed->remove_row_subset();
precomputed->remove_col_subset();
/* on normal data, this gives type II error */
type_II_errors[i]=mmd->perform_test()>alpha;
}
SG_UNREF(p_and_q);
SG_SPRINT("type I error: %f\n", CStatistics::mean(type_I_errors));
SG_SPRINT("type II error: %f\n", CStatistics::mean(type_II_errors));
/* clean up */
SG_UNREF(mmd);
SG_UNREF(gen_p);
SG_UNREF(gen_q);
/* convienience constructor of MMD was used, these were not referenced */
SG_UNREF(feat_p);
SG_UNREF(feat_q);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
// sg_io->set_loglevel(MSG_DEBUG);
quadratic_time_mmd();
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2012 Heiko Strathmann
* Written (W) 2011 Shashwat Lal Das
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*
* This file demonstrates how a regular CDenseFeatures object can
* be used as input for the StreamingFeatures framework, effectively
* making it suitable for using online learning algorithms.
*/
#include <shogun/features/streaming/StreamingDenseFeatures.h>
#include <shogun/io/streaming/StreamingFileFromDenseFeatures.h>
#include <shogun/mathematics/Math.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
#include <shogun/base/init.h>
#include <stdlib.h>
#include <stdio.h>
using namespace shogun;
#define NUM 10
#define DIMS 2
#define DIST 0.5
void gen_rand_data(SGMatrix<float64_t> feat, SGVector<float64_t> lab)
{
for (int32_t i=0; i<NUM; i++)
{
if (i<NUM/2)
{
for (int32_t j=0; j<DIMS; j++)
feat[i*DIMS+j]=CMath::random(0.0, 1.0)+DIST;
if (lab.vector)
lab[i]=0;
}
else
{
for (int32_t j=0; j<DIMS; j++)
feat[i*DIMS+j]=CMath::random(0.0, 1.0)-DIST;
if (lab.vector)
lab[i]=1;
}
}
feat.display_matrix("feat");
lab.display_vector("lab");
}
void test_general()
{
SGMatrix<float64_t> feat(DIMS, NUM);
SGVector<float64_t> lab(NUM);
// Generate random data, features and labels
gen_rand_data(feat, lab);
// Create features
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
SG_REF(features);
features->set_feature_matrix(feat);
// Create a StreamingDenseFeatures object which uses the above as input;
// labels (float64_t*) are optional
CStreamingDenseFeatures<float64_t>* streaming=new CStreamingDenseFeatures<
float64_t>(features, lab);
SG_REF(streaming);
// Start parsing of the examples; in this case, it is trivial - returns each vector from the DenseFeatures object
streaming->start_parser();
int32_t counter=0;
SG_SPRINT("Processing examples...\n\n");
// Run a while loop over all the examples. Note that since
// features are "streaming", there is no predefined
// number_of_vectors known to the StreamingFeatures object.
// Thus, this loop must be used to iterate over all the
// features
while (streaming->get_next_example())
{
counter++;
// Get the current vector; no other vector is accessible
SGVector<float64_t> vec=streaming->get_vector();
float64_t label=streaming->get_label();
SG_SPRINT("Vector %d: [\t", counter);
for (int32_t i=0; i<vec.vlen; i++)
{
SG_SPRINT("%f\t", vec.vector[i]);
}
SG_SPRINT("Label=%f\t", label);
// Calculate dot product of the current vector (from
// the StreamingFeatures object) with itself (the
// vector passed as argument)
float64_t dot_prod=streaming->dense_dot(vec.vector, vec.vlen);
SG_SPRINT("]\nDot product of the vector with itself: %f", dot_prod);
SG_SPRINT("\n\n");
// Free the example, since we are done with processing it.
streaming->release_example();
}
// Now that all examples are used, end the parser.
streaming->end_parser();
SG_UNREF(streaming);
SG_UNREF(features);
}
void test_get_streamed_features()
{
/* create streaming features from dense features and then make call and
* assert that data is equal */
SGMatrix<float64_t> feat(DIMS, NUM);
SGVector<float64_t> lab(NUM);
// Generate random data, features and labels
gen_rand_data(feat, lab);
// Create features
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
SG_REF(features);
features->set_feature_matrix(feat);
// Create a StreamingDenseFeatures object which uses the above as input;
// labels (float64_t*) are optional
CStreamingDenseFeatures<float64_t>* streaming=new CStreamingDenseFeatures<
float64_t>(features, lab);
SG_REF(streaming);
streaming->start_parser();
CDenseFeatures<float64_t>* dense=
(CDenseFeatures<float64_t>*)streaming->get_streamed_features(NUM);
streaming->end_parser();
/* assert that matrices are equal */
ASSERT(dense->get_feature_matrix().equals(feat));
SG_UNREF(dense);
SG_UNREF(features);
SG_UNREF(streaming);
}
void test_get_streamed_features_too_many()
{
/* create streaming features from dense features and then make call and
* assert that data is equal. requests more data than available */
SGMatrix<float64_t> feat(DIMS, NUM);
SGVector<float64_t> lab(NUM);
// Generate random data, features and labels
gen_rand_data(feat, lab);
// Create features
CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>();
SG_REF(features);
features->set_feature_matrix(feat);
// Create a StreamingDenseFeatures object which uses the above as input;
// labels (float64_t*) are optional
CStreamingDenseFeatures<float64_t>* streaming=new CStreamingDenseFeatures<
float64_t>(features, lab);
SG_REF(streaming);
streaming->start_parser();
/* request more features than available */
CDenseFeatures<float64_t>* dense=
(CDenseFeatures<float64_t>*)streaming->get_streamed_features(NUM+10);
streaming->end_parser();
/* assert that matrices are equal */
ASSERT(dense->get_feature_matrix().equals(feat));
SG_UNREF(dense);
SG_UNREF(features);
SG_UNREF(streaming);
}
int main()
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
test_general();
test_get_streamed_features();
test_get_streamed_features_too_many();
//
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Shashwat Lal Das
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*
* This example demonstrates use of the online variant of SGD which
* relies on the streaming features framework.
*/
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/streaming/StreamingAsciiFile.h>
#include <shogun/features/streaming/StreamingDenseFeatures.h>
#include <shogun/classifier/svm/OnlineLibLinear.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
// Create a StreamingAsciiFile from the training data
const char* train_file_name = "../data/train_densereal.light";
CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name);
SG_REF(train_file);
// The bool value is true if examples are labelled.
// 1024 is a good standard value for the number of examples for the parser to hold at a time.
CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024);
SG_REF(train_features);
// Create an OnlineLiblinear object from the features. The first parameter is 'C'.
COnlineLibLinear* svm = new COnlineLibLinear(1, train_features);
svm->set_bias_enabled(false); // Enable/disable bias
svm->train(); // Train
train_file->close();
// Now we want to test on other data
const char* test_file_name = "../data/fm_test_densereal.dat";
CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name);
SG_REF(test_file);
// Similar, but 'false' since the file contains unlabelled examples
CStreamingDenseFeatures<float64_t>* test_features = new CStreamingDenseFeatures<float64_t>(test_file, false, 1024);
SG_REF(test_features);
// Apply on all examples and return a CLabels*
CRegressionLabels* test_labels = svm->apply_regression(test_features);
for (int32_t i=0; i<test_labels->get_num_labels(); i++)
SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i));
SG_UNREF(test_features);
SG_UNREF(test_labels);
SG_UNREF(test_file);
SG_UNREF(train_features);
SG_UNREF(train_file);
SG_UNREF(svm);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2013 Thoralf Klein
* Copyright (C) 2013 Zuse-Institute-Berlin
*
* This example demonstrates use of the online learning with
* OnlineLibLinear using sparse streaming features. This example
* also parses command line options: Can be used as stand-alone
* program to do binary classifications on user-provided inputs.
*/
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/lib/Time.h>
#include <shogun/classifier/svm/OnlineLibLinear.h>
#include <shogun/io/streaming/StreamingAsciiFile.h>
#include <shogun/features/streaming/StreamingSparseFeatures.h>
#include <shogun/labels/BinaryLabels.h>
using namespace shogun;
int main(int argc, char* argv[])
{
init_shogun_with_defaults();
float64_t C = 1.0;
char *train_file_name = (char*)"../data/train_sparsereal.light";
char *test_file_name = (char*)"../data/test_sparsereal.light";
char filename_tmp[] = "test_sparsereal.light.labels.XXXXXX";
int fd = mkstemp(filename_tmp);
ASSERT(fd != -1);
int retval = close(fd);
ASSERT(retval != -1);
char *test_labels_file_name = filename_tmp;
if (argc > 4) {
int32_t idx = 1;
C = atof(argv[idx++]);
train_file_name = argv[idx++];
test_file_name = argv[idx++];
test_labels_file_name = argv[idx++];
ASSERT(idx <= argc);
}
fprintf(stderr, "*** training file %s with C %g\n", train_file_name, C);
// Create an OnlineLiblinear object from the features. The first parameter is 'C'.
COnlineLibLinear *svm = new COnlineLibLinear(C);
svm->set_bias_enabled(true);
{
CTime train_time;
train_time.start();
// Create a StreamingAsciiFile from the training data
CStreamingAsciiFile *train_file = new CStreamingAsciiFile(train_file_name);
SG_REF(train_file);
// The bool value is true if examples are labelled.
// 1024 is a good standard value for the number of examples for the parser to hold at a time.
CStreamingSparseFeatures < float32_t > *train_features =
new CStreamingSparseFeatures < float32_t > (train_file, true, 1024);
SG_REF(train_features);
svm->set_features(train_features);
svm->train();
train_file->close();
SG_UNREF(train_file);
SG_UNREF(train_features);
train_time.stop();
SGVector<float32_t> w_now = svm->get_w().clone();
float32_t w_now_norm = SGVector<float32_t>::twonorm(w_now.vector, w_now.vlen);
uint64_t train_time_int = train_time.cur_time_diff();
fprintf(stderr,
"*** total training time: %llum%llus (or %.1f sec), #dim = %d, ||w|| = %f\n",
train_time_int / 60, train_time_int % 60, train_time.cur_time_diff(),
w_now.vlen, w_now_norm
);
}
{
CTime test_time;
test_time.start();
// Now we want to test on holdout data
CStreamingAsciiFile *test_file = new CStreamingAsciiFile(test_file_name);
SG_REF(test_file);
// Set second parameter to 'false' if the file contains unlabelled examples
CStreamingSparseFeatures < float32_t > *test_features =
new CStreamingSparseFeatures < float32_t > (test_file, true, 1024);
SG_REF(test_features);
// Apply on all examples and return a CBinaryLabels*
CBinaryLabels *test_binary_labels = svm->apply_binary(test_features);
SG_REF(test_binary_labels);
test_time.stop();
uint64_t test_time_int = test_time.cur_time_diff();
fprintf(stderr, "*** testing took %llum%llus (or %.1f sec)\n",
test_time_int / 60, test_time_int % 60, test_time.cur_time_diff());
SG_UNREF(test_features);
SG_UNREF(test_file);
// Writing labels for evaluation
fprintf(stderr, "*** writing labels to file %s\n", test_labels_file_name);
FILE* fh = fopen(test_labels_file_name, "wb");
ASSERT(fh);
for (int32_t j = 0; j < test_binary_labels->get_num_labels(); j++)
fprintf(fh, "%d\n", test_binary_labels->get_int_label(j));
fclose(fh);
SG_UNREF(test_binary_labels);
unlink(test_labels_file_name);
}
SG_UNREF(svm);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Shashwat Lal Das
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*
* This example demonstrates use of the online variant of SGD which
* relies on the streaming features framework.
*/
#include <shogun/lib/common.h>
#include <shogun/io/StreamingAsciiFile.h>
#include <shogun/features/StreamingSparseFeatures.h>
#include <shogun/classifier/svm/OnlineSVMSGD.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
// Create a StreamingAsciiFile from the training data
char* train_file_name = "../data/train_sparsereal.light";
CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name);
SG_REF(train_file);
// Create a StreamingSparseFeatures from the StreamingAsciiFile.
// The bool value is true if examples are labelled.
// 1024 is a good standard value for the number of examples for the parser to hold at a time.
CStreamingSparseFeatures<float64_t>* train_features = new CStreamingSparseFeatures<float64_t>(train_file, true, 1024);
SG_REF(train_features);
// Create an OnlineSVMSGD object from the features. The first parameter is 'C'.
COnlineSVMSGD* sgd = new COnlineSVMSGD(1, train_features);
sgd->set_bias_enabled(false); // Enable/disable bias
sgd->set_lambda(0.1); // Choose lambda
sgd->train(); // Train
train_file->close();
// Now we want to test on other data
char* test_file_name = "../data/fm_test_sparsereal.dat";
CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name);
SG_REF(test_file);
// Similar, but 'false' since the file contains unlabelled examples
CStreamingSparseFeatures<float64_t>* test_features = new CStreamingSparseFeatures<float64_t>(test_file, false, 1024);
SG_REF(test_features);
// Apply on all examples and return a CLabels*
CLabels* test_labels = sgd->apply(test_features);
for (int32_t i=0; i<test_labels->get_num_labels(); i++)
SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i));
SG_UNREF(test_features);
SG_UNREF(test_file);
SG_UNREF(train_features);
SG_UNREF(train_file);
SG_UNREF(sgd);
exit_shogun();
return 0;
}
// This example simply demonstrates the use/working of StreamingStringFeatures
#include <shogun/lib/common.h>
#include <shogun/lib/config.h>
#include <shogun/lib/SGString.h>
#include <shogun/base/init.h>
#include <shogun/io/streaming/StreamingAsciiFile.h>
#include <shogun/features/streaming/StreamingStringFeatures.h>
using namespace shogun;
void display_vector(const SGString<char> &vec)
{
printf("\nNew Vector\n------------------\n");
printf("Length=%d.\n", vec.slen);
for (int32_t i=0; i<vec.slen; i++)
{
printf("%c", vec.string[i]);
}
printf("\n");
}
int main(int argc, char **argv)
{
init_shogun_with_defaults();
// Create a StreamingAsciiFile from our input file
CStreamingAsciiFile* file = new CStreamingAsciiFile("../data/fm_train_dna.dat");
SG_REF(file);
// This file contains unlabelled data, so the second arg is `false'.
CStreamingStringFeatures<char>* feat = new CStreamingStringFeatures<char>(file, false, 1024);
SG_REF(feat);
// Alphabet to use is DNA
feat->use_alphabet(DNA);
// Loop over all examples and simply display each example
feat->start_parser();
while (feat->get_next_example())
{
SGString<char> vec = feat->get_vector();
display_vector(vec);
feat->release_example();
}
feat->end_parser();
// Get the alphabet and display the histogram
CAlphabet* alpha = feat->get_alphabet();
printf("\nThe histogram is:\n");
alpha->print_histogram();
SG_UNREF(alpha);
SG_UNREF(feat);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Shashwat Lal Das
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*
* This example demonstrates use of the Vowpal Wabbit learning algorithm.
*/
#include <shogun/lib/common.h>
#include <shogun/io/streaming/StreamingVwFile.h>
#include <shogun/features/streaming/StreamingVwFeatures.h>
#include <shogun/classifier/vw/VowpalWabbit.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
const char* train_file_name = "../data/train_sparsereal.light";
CStreamingVwFile* train_file = new CStreamingVwFile(train_file_name);
train_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format
SG_REF(train_file);
CStreamingVwFeatures* train_features = new CStreamingVwFeatures(train_file, true, 1024);
SG_REF(train_features);
CVowpalWabbit* vw = new CVowpalWabbit(train_features);
vw->set_regressor_out("./vw_regressor_text.dat"); // Save regressor to this file
vw->set_adaptive(false); // Use adaptive learning
vw->train_machine();
SG_SPRINT("Weights have been output in text form to vw_regressor_text.dat.\n");
train_file->close();
CStreamingVwFile* test_file = new CStreamingVwFile(train_file_name);
test_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format
CStreamingVwFeatures* test_features = new CStreamingVwFeatures(test_file, true, 1024);
test_features->start_parser();
while (test_features->get_next_example())
{
VwExample *example = test_features->get_example();
float64_t pred = vw->predict_and_finalize(example);
printf("%.2lf\n", pred);
test_features->release_example();
}
test_features->end_parser();
test_file->close();
SG_UNREF(train_features);
SG_UNREF(train_file);
SG_UNREF(vw);
SG_UNREF(test_features);
SG_UNREF(test_file);
exit_shogun();
return 0;
}
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* Written (W) 2011 Shashwat Lal Das
* Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
*
* This example demonstrates use of online SGD with CStreamingVwFeatures
* as the features object.
*/
#include <shogun/lib/common.h>
#include <shogun/lib/config.h>
#include <shogun/base/init.h>
#include <shogun/io/streaming/StreamingVwFile.h>
#include <shogun/features/streaming/StreamingVwFeatures.h>
#include <shogun/classifier/svm/OnlineSVMSGD.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
const char* train_file_name = "../data/train_sparsereal.light";
CStreamingVwFile* train_file = new CStreamingVwFile(train_file_name);
train_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format
SG_REF(train_file);
CStreamingVwFeatures* train_features = new CStreamingVwFeatures(train_file, true, 1024);
SG_REF(train_features);
COnlineSVMSGD* sgd = new COnlineSVMSGD(1, train_features);
sgd->set_bias_enabled(false);
sgd->set_lambda(0.1);
sgd->train();
// Now we want to test on other data
const char* test_file_name = "../data/fm_test_sparsereal.dat";
CStreamingVwFile* test_file = new CStreamingVwFile(test_file_name);
test_file->set_parser_type(T_SVMLIGHT);
SG_REF(test_file);
//Similar, but 'false' since the file contains unlabelled examples
CStreamingVwFeatures* test_features = new CStreamingVwFeatures(test_file, false, 1024);
SG_REF(test_features);
// Apply on all examples and return a CLabels*
CBinaryLabels* test_labels = sgd->apply_binary(test_features);
for (int32_t i=0; i<test_labels->get_num_labels(); i++)
SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i));
SG_UNREF(test_features);
SG_UNREF(sgd);
SG_UNREF(train_features);
SG_UNREF(test_labels);
exit_shogun();
return 0;
}
#include <shogun/structure/HMSVMModel.h>
#include <shogun/structure/DualLibQPBMSOSVM.h>
#include <shogun/structure/StateModelTypes.h>
#include <shogun/features/MatrixFeatures.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
float64_t features_dat[] = {0,1,1, 2,1,2, 0,1,0, 0,2,2};
SGMatrix<float64_t> features_mat(features_dat,1,12,false);
CMatrixFeatures<float64_t>* features = new CMatrixFeatures<float64_t>(features_mat,3,4);
int32_t labels_dat[] = {0,0,0, 1,1,1, 0,0,0, 1,1,1};
SGVector<int32_t> labels_vec(labels_dat,12,false);
CSequenceLabels* labels = new CSequenceLabels(labels_vec,3,4,2);
labels->io->set_loglevel(MSG_DEBUG);
CHMSVMModel* model = new CHMSVMModel(features, labels, SMT_TWO_STATE, 3);
CDualLibQPBMSOSVM* sosvm = new CDualLibQPBMSOSVM(model, labels, 5000,0);
sosvm->train();
SG_UNREF(sosvm);
exit_shogun();
return 0;
}
#include <shogun/labels/StructuredLabels.h>
#include <shogun/labels/LabelsFactory.h>
#include <shogun/structure/HMSVMModel.h>
#include <shogun/structure/PrimalMosekSOSVM.h>
#include <shogun/structure/TwoStateModel.h>
using namespace shogun;
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
#ifdef USE_MOSEK
int32_t num_examples = 10;
int32_t example_length = 250;
int32_t num_features = 10;
int32_t num_noise_features = 2;
CHMSVMModel* model = CTwoStateModel::simulate_data(num_examples, example_length, num_features, num_noise_features);
CStructuredLabels* labels = model->get_labels();
CFeatures* features = model->get_features();
CPrimalMosekSOSVM* sosvm = new CPrimalMosekSOSVM(model, labels);
SG_REF(sosvm);
sosvm->train();
// sosvm->get_w().display_vector("w");
CStructuredLabels* out = CLabelsFactory::to_structured(sosvm->apply());
ASSERT( out->get_num_labels() == labels->get_num_labels() );
for ( int32_t i = 0 ; i < out->get_num_labels() ; ++i )
{
CSequence* pred_seq = CSequence::obtain_from_generic( out->get_label(i) );
CSequence* true_seq = CSequence::obtain_from_generic( labels->get_label(i) );
SG_UNREF(pred_seq);
SG_UNREF(true_seq);
}
SG_UNREF(out);
SG_UNREF(features); // because model->get_features() increased the count
SG_UNREF(labels); // because model->get_labels() increased the count
SG_UNREF(sosvm);
#endif /* USE_MOSEK */
exit_shogun();
return 0;
}
#include <shogun/structure/TwoStateModel.h>
#include <shogun/structure/HMSVMModel.h>
#include <shogun/structure/DualLibQPBMSOSVM.h>
using namespace shogun;
int main()
{
init_shogun_with_defaults();
CTwoStateModel* tsm = new CTwoStateModel();
CHMSVMModel* model = tsm->simulate_data(100,250,3,1);
CStructuredLabels* labels = model->get_labels();
CDualLibQPBMSOSVM* sosvm = new CDualLibQPBMSOSVM(model, labels, 5000.0);
sosvm->train();
SG_UNREF(sosvm);
SG_UNREF(labels);
SG_UNREF(tsm);
exit_shogun();
return 0;
}
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/transfer/multitask/MultitaskLeastSquaresRegression.h>
#include <shogun/transfer/multitask/Task.h>
#include <shogun/transfer/multitask/TaskTree.h>
#include <shogun/transfer/multitask/TaskGroup.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char** argv)
{
init_shogun(&print_message);
// create some data
SGMatrix<float64_t> matrix(2,4);
for (int32_t i=0; i<2*4; i++)
matrix.matrix[i]=i;
CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix);
// create three labels
CRegressionLabels* labels=new CRegressionLabels(4);
labels->set_label(0, -1.4);
labels->set_label(1, +1.5);
labels->set_label(2, -1.2);
labels->set_label(3, +1.1);
CTask* first_task = new CTask(0,2);
CTask* second_task = new CTask(2,4);
CTaskGroup* task_group = new CTaskGroup();
task_group->append_task(first_task);
task_group->append_task(second_task);
CMultitaskLeastSquaresRegression* regressor = new CMultitaskLeastSquaresRegression(0.5,features,labels,task_group);
regressor->train();
regressor->set_current_task(0);
regressor->get_w().display_vector();
SG_UNREF(regressor);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
#include <shogun/lib/config.h>
#ifdef USE_GPL_SHOGUN
#include <shogun/labels/RegressionLabels.h>
#include <shogun/features/DenseFeatures.h>
#include <shogun/transfer/multitask/MultitaskLogisticRegression.h>
#include <shogun/transfer/multitask/Task.h>
#include <shogun/transfer/multitask/TaskTree.h>
#include <shogun/transfer/multitask/TaskGroup.h>
#include <shogun/base/init.h>
#include <shogun/lib/common.h>
#include <shogun/io/SGIO.h>
using namespace shogun;
void print_message(FILE* target, const char* str)
{
fprintf(target, "%s", str);
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
// create some data
SGMatrix<float64_t> matrix(2,4);
for (int32_t i=0; i<2*4; i++)
matrix.matrix[i]=i;
CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix);
// create three labels
CBinaryLabels* labels=new CBinaryLabels(4);
labels->set_label(0, -1);
labels->set_label(1, +1);
labels->set_label(2, -1);
labels->set_label(3, +1);
CTask* first_task = new CTask(0,2);
CTask* second_task = new CTask(2,4);
CTaskGroup* task_group = new CTaskGroup();
task_group->append_task(first_task);
task_group->append_task(second_task);
CMultitaskLogisticRegression* regressor = new CMultitaskLogisticRegression(0.5,features,labels,task_group);
regressor->train();
regressor->set_current_task(0);
regressor->get_w().display_vector();
CTask* root_task = new CTask(0,4);
root_task->add_subtask(first_task);
root_task->add_subtask(second_task);
CTaskTree* task_tree = new CTaskTree(root_task);
regressor->set_task_relation(task_tree);
regressor->train();
regressor->set_current_task(0);
regressor->get_w().display_vector();
SG_UNREF(regressor);
exit_shogun();
return 0;
}
#else //USE_GPL_SHOGUN
int main(int argc, char** argv)
{
return 0;
}
#endif //USE_GPL_SHOGUN
/*
* Copyright (c) The Shogun Machine Learning Toolbox
* Written (w) 2014 Wu Lin
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation are those
* of the authors and should not be interpreted as representing official policies,
* either expressed or implied, of the Shogun Development Team.
*
* Code adapted from
* https://github.com/emtiyaz/VariationalApproxExample
* and the reference paper is
* Marlin, Benjamin M., Mohammad Emtiyaz Khan, and Kevin P. Murphy.
* "Piecewise Bounds for Estimating Bernoulli-Logistic Latent Gaussian Models." ICML. 2011.
*
* This code specifically adapted from example.m and simpleVariational.m
*/
// Eigen3 is required for working with this example
#include <shogun/lib/config.h>
#include <shogun/base/init.h>
#include <shogun/machine/gp/LogitVGPiecewiseBoundLikelihood.h>
#include <shogun/distributions/classical/GaussianDistribution.h>
#include <shogun/optimization/lbfgs/lbfgs.h>
#include <shogun/mathematics/Math.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/mathematics/eigen3.h>
#include <shogun/io/CSVFile.h>
#include <cstdio>
using namespace shogun;
//init the variational Piecewise bound
SGMatrix<float64_t> init_piecewise_bound(const char * fname)
{
SGMatrix<float64_t> bound;
CCSVFile* bound_file = new CCSVFile(fname);
bound_file->set_delimiter('\t');
bound.load(bound_file);
SG_UNREF(bound_file);
return bound;
}
//The following pre-init value is used to verify the correctness
//The following code will be removed.
SGVector<float64_t> load_m_from_matlab(const char * fname)
{
SGVector<float64_t> m_from_matlab;
CCSVFile* m_file = new CCSVFile(fname);
m_file->set_delimiter('\t');
m_from_matlab.load(m_file);
SG_UNREF(m_file);
return m_from_matlab;
}
//The following pre-init value is used to verify the correctness
//The following code will be removed.
float64_t load_loglik_from_matlab(const char * fname)
{
SGVector<float64_t> f_from_matlab;
CCSVFile* f_file = new CCSVFile(fname);
f_file->set_delimiter('\t');
f_from_matlab.load(f_file);
SG_UNREF(f_file);
REQUIRE(f_from_matlab.vlen == 1, "logLik is a scalar");
return f_from_matlab[0];
}
//Randomly generating the input feature (X)
SGMatrix<float64_t> create_feature(const char *fname, index_t num_sample,
index_t num_dim)
{
REQUIRE(num_sample % 2 == 0, "For this example we assume the num_sample is even");
/*
//X = [5*rand(N/2,D); -5*rand(N/2,D)];
//The following code is used to generate synthetic data
SGMatrix<float64_t> X(num_sample,num_dim);
for(index_t i = 0; i < num_sample; i++)
{
for(index_t j = 0; j < num_dim; j++)
{
if (i < num_sample/2)
X(i, j) = CMath::random(0,1)*5.0;
else
X(i, j) = CMath::random(0,1)*-5.0;
}
}
*/
//The following pre-init value is used to verify the correctness
//The following code will be removed.
SGMatrix<float64_t> X;
CCSVFile* X_file = new CCSVFile(fname);
X_file->set_delimiter('\t');
X.load(X_file);
SG_UNREF(X_file);
return X;
}
//Randomly generating the observated labels (y) followed by Guassian distribution (synthetic data)
SGVector<float64_t> create_label(const char * fname, SGVector<float64_t> mu,
SGMatrix<float64_t> sigma)
{
REQUIRE(sigma.num_rows == sigma.num_cols, "Sigma should be a covariance (square) matrix");
REQUIRE(sigma.num_rows == mu.vlen, "Sigma and mu should have the same dimensionality");
/*
//The following code is used to generate synthetic data
index_t num_sample = sigma.num_rows;
SGVector<float64_t> y(num_sample);
Eigen::Map<Eigen::MatrixXd> eigen_sigma(sigma.matrix, sigma.num_rows, sigma.num_cols);
//y = mvnrnd(mu, Sigma, 1);
CProbabilityDistribution * dist = new CGaussianDistribution(mu, sigma);
y = dist->sample();
//y = (y(:)>0);
//Note that Shogun uses -1 and 1 as labels
for( index_t i = 0; i < y.vlen; ++i)
{
if (y[i] > 0)
y[i] = 1;
else
y[i] = -1;
}
SG_UNREF(dist);
*/
//The following pre-init value is used to verify the correctness
//The following code will be removed.
//Note that Shogun uses -1 and 1 as labels
SGVector<float64_t> y;
CCSVFile* y_file = new CCSVFile(fname);
y_file->set_delimiter('\t');
y.load(y_file);
SG_UNREF(y_file);
for(index_t i = 0; i < y.vlen; i++)
{
if (y[i] > 0)
y[i] = 1;
else
y[i] = -1;
}
REQUIRE(y.vlen == mu.vlen,
"The labels loaded from the file should have the same dimensionality of mu");
return y;
}
//The following struct is used to pass information when using the build-in L-BFGS component
struct Shared
{
CLogitVGPiecewiseBoundLikelihood *lik;
SGVector<float64_t> y;
SGVector<float64_t> mu;
lbfgs_parameter_t lbfgs_param;
SGVector<float64_t> m0;
SGVector<float64_t> v;
SGMatrix<float64_t> sigma;
SGMatrix<float64_t> data;
SGMatrix<float64_t> bound;
Eigen::LDLT<Eigen::MatrixXd> ldlt;
};
//Init the parameters used for L-BFGS
lbfgs_parameter_t inti_lbfgs_parameters()
{
lbfgs_parameter_t tmp;
tmp.m = 100;
tmp.max_linesearch = 1000;
tmp.linesearch = LBFGS_LINESEARCH_DEFAULT;
tmp.max_iterations = 1000;
tmp.delta = 1e-15;
tmp.past = 0;
tmp.epsilon = 1e-15;
tmp.min_step = 1e-20;
tmp.max_step = 1e+20;
tmp.ftol = 1e-4;
tmp.wolfe = 0.9;
tmp.gtol = 0.9;
tmp.xtol = 1e-16;
tmp.orthantwise_c = 0;
tmp.orthantwise_start = 0;
tmp.orthantwise_end = 1;
return tmp;
}
//This function is similar to the Matlab code, simpleVariational.m
float64_t evaluate(void *obj, const float64_t *variable, float64_t *gradient,
const int dim, const float64_t step)
{
Shared * obj_prt = static_cast<Shared *>(obj);
CBinaryLabels lab(obj_prt->y);
obj_prt->lik->set_variational_distribution(obj_prt->m0, obj_prt->v, &lab);
Eigen::Map<Eigen::VectorXd> eigen_mu(obj_prt->mu.vector, obj_prt->mu.vlen);
Eigen::Map<Eigen::VectorXd> eigen_m(obj_prt->m0.vector, obj_prt->m0.vlen);
//[fi, gmi, gvi] = ElogLik('bernLogit', y, m, v, bound); get fi at here
SGVector<float64_t> fi = obj_prt->lik->get_variational_expection();
TParameter* mu_param = obj_prt->lik->m_parameters->get_parameter("mu");
//[fi, gmi, gvi] = ElogLik('bernLogit', y, m, v, bound); get gmi at here
SGVector<float64_t> gmi =
obj_prt->lik->get_variational_first_derivative(mu_param);
SGVector<float64_t> g(dim);
Eigen::Map<Eigen::VectorXd> eigen_g(g.vector, g.vlen);
//e = m-mu;
//g = Omega*e;
eigen_g = obj_prt->ldlt.solve(eigen_m - eigen_mu);
//f = -e'*g/2 + sum(fi);
Eigen::VectorXd ff = -0.5*((eigen_m-eigen_mu).transpose()*eigen_g);
ASSERT(ff.size() == 1);
float64_t f = ff(0) + SGVector<float64_t>::sum(fi);
Eigen::Map<Eigen::VectorXd> eigen_gradient(gradient, dim);
//get the gradient based on the current variable
Eigen::Map<Eigen::VectorXd> eigen_gmi(gmi.vector, gmi.vlen);
//g = -g + gmi;
//g = -g;
eigen_gradient = eigen_g - eigen_gmi;
//f = -f;
return -f;
}
void run(const char * x_file, const char * y_file, const char * bound_file,
const char * m_file, const char * loglik_file)
{
//N = 20; % number of data examples
index_t num_sample = 20;
//D = 5; % feature dimensionality
index_t num_dim = 5;
Shared obj;
//X = [5*rand(N/2,D); -5*rand(N/2,D)];
obj.data = create_feature(x_file, num_sample, num_dim);
//if we read from file
num_sample = obj.data.num_rows;
num_dim = obj.data.num_cols;
SG_SPRINT("num_samples:%d num_dimensions:%d\n", num_sample, num_dim);
//Sigma = X*X' + eye(N); % linear kernel
obj.sigma = SGMatrix<float64_t> (num_sample, num_sample);
Eigen::Map<Eigen::MatrixXd> eigen_data(obj.data.matrix, obj.data.num_rows,
obj.data.num_cols);
Eigen::Map<Eigen::MatrixXd> eigen_sigma(obj.sigma.matrix,
obj.sigma.num_rows, obj.sigma.num_cols);
//Sigma = X*X' + eye(N);
eigen_sigma = eigen_data * (eigen_data.transpose()) +
Eigen::MatrixXd::Identity(num_sample, num_sample);
//mu = zeros(N,1); % zero mean
obj.mu = SGVector<float64_t> (num_sample);
Eigen::Map<Eigen::VectorXd> eigen_mu(obj.mu.vector, obj.mu.vlen);
//mu = zeros(N,1); % zero mean
eigen_mu.fill(0);
//y = mvnrnd(mu, Sigma, 1);
//y = (y(:)>0);
obj.y = create_label(y_file, obj.mu, obj.sigma);
//% optimizers options
//optMinFunc = struct('Display', 1,...
//'Method', 'lbfgs',...
//'DerivativeCheck', 'off',...
//'LS', 2,...
//'MaxIter', 1000,...
//'MaxFunEvals', 1000,...
//'TolFun', 1e-4,......
//'TolX', 1e-4);
obj.lbfgs_param = inti_lbfgs_parameters();
//load('llp.mat');
obj.bound = init_piecewise_bound(bound_file);
obj.lik = new CLogitVGPiecewiseBoundLikelihood();
obj.lik->set_variational_bound(obj.bound);
//m0 = mu; % initial value all zero
obj.m0 = SGVector<float64_t> (num_sample);
obj.v = SGVector<float64_t> (num_sample);
Eigen::Map<Eigen::VectorXd> eigen_m0(obj.m0.vector, obj.m0.vlen);
//m0 = mu; % initial value
eigen_m0 = eigen_mu;
//v = ones(N,1); % fix v to 1
Eigen::Map<Eigen::VectorXd> eigen_v(obj.v, num_sample);
eigen_v.fill(1);
//Omega = inv(Sigma);
obj.ldlt.compute(eigen_sigma);
//sigma is positive definitive
ASSERT(obj.ldlt.isPositive());
float64_t logLik = 0.0;
//[m, logLik] = minFunc(@simpleVariational, m0, optMinFunc, y, X, mu, Omega, v, bound);
int ret = lbfgs(obj.m0.vlen, obj.m0.vector, &logLik,
evaluate, NULL, &obj, &obj.lbfgs_param);
SGVector<float64_t> m_from_matlab = load_m_from_matlab(m_file);
float64_t logLik_from_matlab = load_loglik_from_matlab(loglik_file);
ASSERT(m_from_matlab.vlen == num_sample);
SG_SPRINT("lbfgs status =%d\n",ret);
SG_SPRINT("logLik from Shogun =%.10f from Matlab =%.10f\n", logLik, logLik_from_matlab);
SG_SPRINT("opt m =\n");
for(index_t i = 0; i < obj.m0.vlen; ++i)
{
float64_t relative_diff;
if (m_from_matlab[i] != 0.0)
relative_diff = CMath::abs(obj.m0[i]/m_from_matlab[i] - 1);
else
relative_diff = CMath::abs(obj.m0[i]);
SG_SPRINT("m[%d] from Shogun =%.10f from Matlab = %.10f relative_diff = %.10f\n", i+1,
obj.m0[i], m_from_matlab[i], relative_diff);
}
SG_UNREF(obj.lik);
}
void test_datasets()
{
const index_t buff_size = 1024;
const char * data_path = "../data/variational";
char bound_path_buffer[buff_size];
char x_path_buffer[buff_size];
char y_path_buffer[buff_size];
char m_path_buffer[buff_size];
char loglik_path_buffer[buff_size];
snprintf(bound_path_buffer, buff_size, "%s/bounds", data_path);
FILE* pfile = fopen(bound_path_buffer, "r");
if (pfile == NULL)
{
SG_SPRINT("Unable to open file: %s\n", bound_path_buffer);
return;
}
fclose(pfile);
for (index_t i = 4; i <= 6; i++)
{
snprintf(x_path_buffer, buff_size, "%s/X_dataset%d", data_path, i);
snprintf(y_path_buffer, buff_size, "%s/y_dataset%d", data_path, i);
snprintf(m_path_buffer, buff_size, "%s/m_dataset%d", data_path, i);
snprintf(loglik_path_buffer, buff_size, "%s/logLik_dataset%d", data_path, i);
SG_SPRINT("\nDataset %d\n", i);
run(x_path_buffer, y_path_buffer, bound_path_buffer, m_path_buffer, loglik_path_buffer);
}
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
test_datasets();
exit_shogun();
return 0;
}