SHOGUN
4.2.0
|
This page lists ready to run shogun examples for the C++ libshogun interface.
To run the examples you will need to manually compile them via
g++ name_of_example.cpp -lshogun
in case you installed libshogun to a nonstandard directory you will need to specify the appropriate library and include paths, e.g.
g++ -I/path/to/libshogun/includes name_of_example.cpp -L/path/to/libshogun/sofile -lshogun
Then the examples are standard binary executables and can be started via
./name_of_example
respectively if the libraries are in nonstandard locations (such that they cannot be found by the dynamic linker)
LD_LIBRARY_PATH=path/to/libshogun ./name_of_example
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the Vowpal Wabbit learning algorithm. */ #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/multiclass/tree/BalancedConditionalProbabilityTree.h> using namespace shogun; int main(int argc, char **argv) { init_shogun_with_defaults(); const char* train_file_name = "../data/7class_example4_train.dense"; const char* test_file_name = "../data/7class_example4_test.dense"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024); SG_REF(train_features); CBalancedConditionalProbabilityTree *cpt = new CBalancedConditionalProbabilityTree(); cpt->set_num_passes(1); cpt->set_features(train_features); if (argc > 1) { float64_t alpha = 0.5; sscanf(argv[1], "%lf", &alpha); SG_SPRINT("Setting alpha to %.2lf\n", alpha); cpt->set_alpha(alpha); } cpt->train(); cpt->print_tree(); CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *pred = cpt->apply_multiclass(test_features); test_features->reset_stream(); SG_SPRINT("num_labels = %d\n", pred->get_num_labels()); SG_UNREF(test_features); SG_UNREF(test_file); test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels()); SG_REF(gnd); test_features->start_parser(); for (int32_t i=0; i < pred->get_num_labels(); ++i) { test_features->get_next_example(); gnd->set_int_label(i, test_features->get_label()); test_features->release_example(); } test_features->end_parser(); int32_t n_correct = 0; for (index_t i=0; i < pred->get_num_labels(); ++i) { if (pred->get_int_label(i) == gnd->get_int_label(i)) n_correct++; //SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i)); } SG_SPRINT("\n"); SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels()); SG_UNREF(gnd); SG_UNREF(train_features); SG_UNREF(test_features); SG_UNREF(train_file); SG_UNREF(test_file); SG_UNREF(cpt); SG_UNREF(pred); exit_shogun(); return 0; }
#include <shogun/base/init.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Viktor Gal */ #include <shogun/base/init.h> #include <shogun/machine/BaggingMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/ensemble/MajorityVote.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/streaming/generators/MeanShiftDataGenerator.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); float64_t difference = 2.5; index_t dim = 2; index_t num_neg = 20; index_t num_pos = 20; int32_t num_bags = 5; int32_t bag_size = 25; /* streaming data generator for mean shift distributions */ CMeanShiftDataGenerator* gen_n = new CMeanShiftDataGenerator(0, dim); CMeanShiftDataGenerator* gen_p = new CMeanShiftDataGenerator(difference, dim); CFeatures* neg = gen_n->get_streamed_features(num_pos); CFeatures* pos = gen_p->get_streamed_features(num_neg); CDenseFeatures<float64_t>* train_feats = CDenseFeatures<float64_t>::obtain_from_generic(neg->create_merged_copy(pos)); SGVector<float64_t> tl(num_neg+num_pos); tl.set_const(1); for (index_t i = 0; i < num_neg; ++i) tl[i] = -1; CBinaryLabels* train_labels = new CBinaryLabels(tl); CBaggingMachine* bm = new CBaggingMachine(train_feats, train_labels); CLibLinear* ll = new CLibLinear(); ll->set_bias_enabled(true); CMajorityVote* mv = new CMajorityVote(); bm->set_num_bags(num_bags); bm->set_bag_size(bag_size); bm->set_machine(ll); bm->set_combination_rule(mv); bm->train(); CBinaryLabels* pred_bagging = bm->apply_binary(train_feats); CContingencyTableEvaluation* eval = new CContingencyTableEvaluation(); pred_bagging->get_int_labels().display_vector(); float64_t bag_accuracy = eval->evaluate(pred_bagging, train_labels); float64_t oob_error = bm->get_oob_error(eval); CLibLinear* libLin = new CLibLinear(2.0, train_feats, train_labels); libLin->set_bias_enabled(true); libLin->train(); CBinaryLabels* pred_liblin = libLin->apply_binary(train_feats); pred_liblin->get_int_labels().display_vector(); float64_t liblin_accuracy = eval->evaluate(pred_liblin, train_labels); SG_SPRINT("bagging accuracy: %f (OOB-error: %f)\nLibLinear accuracy: %f\n", bag_accuracy, oob_error, liblin_accuracy); SG_UNREF(bm); SG_UNREF(pos); SG_UNREF(neg); SG_UNREF(eval); exit_shogun(); return 0; }
#include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/FeatureBlockLogisticRegression.h> #include <shogun/lib/IndexBlock.h> #include <shogun/lib/IndexBlockTree.h> #include <shogun/lib/IndexBlockGroup.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun_with_defaults(); // create some data SGMatrix<float64_t> matrix(4,4); for (int32_t i=0; i<4*4; i++) matrix.matrix[i]=i; CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create three labels CBinaryLabels* labels=new CBinaryLabels(4); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); labels->set_label(3, +1); CIndexBlock* first_block = new CIndexBlock(0,2); CIndexBlock* second_block = new CIndexBlock(2,4); CIndexBlockGroup* block_group = new CIndexBlockGroup(); block_group->add_block(first_block); block_group->add_block(second_block); CFeatureBlockLogisticRegression* regressor = new CFeatureBlockLogisticRegression(0.5,features,labels,block_group); regressor->train(); regressor->get_w().display_vector(); CIndexBlock* root_block = new CIndexBlock(0,4); root_block->add_sub_block(first_block); root_block->add_sub_block(second_block); CIndexBlockTree* block_tree = new CIndexBlockTree(root_block); regressor->set_feature_relation(block_tree); regressor->train(); regressor->get_w().display_vector(); SG_UNREF(regressor); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Roman Votyakov */ #ifdef USE_GPL_SHOGUN #include <shogun/lib/config.h> #include <shogun/base/init.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/machine/gp/SingleLaplacianInferenceMethod.h> #include <shogun/machine/gp/EPInferenceMethod.h> #include <shogun/machine/gp/ZeroMean.h> #include <shogun/machine/gp/LogitLikelihood.h> #include <shogun/machine/gp/ProbitLikelihood.h> #include <shogun/classifier/GaussianProcessClassification.h> #include <shogun/io/CSVFile.h> using namespace shogun; // files with training data const char* fname_feat_train="../data/fm_train_real.dat"; const char* fname_label_train="../data/label_train_twoclass.dat"; // file with testing data const char* fname_feat_test="../data/fm_test_real.dat"; int main(int argc, char** argv) { init_shogun_with_defaults(); // trainig data SGMatrix<float64_t> X_train; SGVector<float64_t> y_train; // load training features from file CCSVFile* file_feat_train=new CCSVFile(fname_feat_train); X_train.load(file_feat_train); SG_UNREF(file_feat_train); // load training labels from file CCSVFile* file_label_train=new CCSVFile(fname_label_train); y_train.load(file_label_train); SG_UNREF(file_label_train); // testing features SGMatrix<float64_t> X_test; // load testing features from file CCSVFile* file_feat_test=new CCSVFile(fname_feat_test); X_test.load(file_feat_test); SG_UNREF(file_feat_test); // convert training and testing data into shogun representation CDenseFeatures<float64_t>* feat_train=new CDenseFeatures<float64_t>(X_train); CBinaryLabels* lab_train=new CBinaryLabels(y_train); CDenseFeatures<float64_t>* feat_test=new CDenseFeatures<float64_t>(X_test); SG_REF(feat_test); // create Gaussian kernel with width = 2.0 CGaussianKernel* kernel=new CGaussianKernel(10, 2.0); // create zero mean function CZeroMean* mean=new CZeroMean(); // you can easily switch between probit and logit likelihood models // by uncommenting/commenting the following lines: // create probit likelihood model // CProbitLikelihood* lik=new CProbitLikelihood(); // create logit likelihood model CLogitLikelihood* lik=new CLogitLikelihood(); // you can easily switch between SingleLaplace and EP approximation by // uncommenting/commenting the following lines: // specify SingleLaplace approximation inference method // CSingleLaplacianInferenceMethod* inf=new CSingleLaplacianInferenceMethod(kernel, // feat_train, mean, lab_train, lik); // specify EP approximation inference method CEPInferenceMethod* inf=new CEPInferenceMethod(kernel, feat_train, mean, lab_train, lik); // create and train GP classifier, which uses SingleLaplace approximation CGaussianProcessClassification* gpc=new CGaussianProcessClassification(inf); gpc->train(); // apply binary classification to the test data and get -1/+1 // labels of the predictions CBinaryLabels* predictions=gpc->apply_binary(feat_test); predictions->get_labels().display_vector("predictions"); // get probabilities p(y*=1|x*) for each testing feature x* SGVector<float64_t> p_test=gpc->get_probabilities(feat_test); p_test.display_vector("predictive probability"); // get predictive mean SGVector<float64_t> mu_test=gpc->get_mean_vector(feat_test); mu_test.display_vector("predictive mean"); // get predictive variance SGVector<float64_t> s2_test=gpc->get_variance_vector(feat_test); s2_test.display_vector("predictive variance"); // free up memory SG_UNREF(gpc); SG_UNREF(predictions); SG_UNREF(feat_test); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Heiko Strathmann and others */ #include <shogun/labels/MulticlassLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/multiclass/LaRank.h> #include <shogun/base/init.h> using namespace shogun; void test() { index_t num_vec=10; index_t num_feat=3; index_t num_class=num_feat; // to make data easy float64_t distance=15; // create some linearly seperable data SGMatrix<float64_t> matrix(num_class, num_vec); SGMatrix<float64_t> matrix_test(num_class, num_vec); CMulticlassLabels* labels=new CMulticlassLabels(num_vec); CMulticlassLabels* labels_test=new CMulticlassLabels(num_vec); for (index_t i=0; i<num_vec; ++i) { index_t label=i%num_class; for (index_t j=0; j<num_feat; ++j) { matrix(j,i)=CMath::randn_double(); matrix_test(j,i)=CMath::randn_double(); labels->set_label(i, label); labels_test->set_label(i, label); } /* make sure data is linearly seperable per class */ matrix(label,i)+=distance; matrix_test(label,i)+=distance; } matrix.display_matrix("matrix"); labels->get_int_labels().display_vector("labels"); // shogun will now own the matrix created CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); CDenseFeatures<float64_t>* features_test= new CDenseFeatures<float64_t>(matrix_test); // create three labels for (index_t i=0; i<num_vec; ++i) labels->set_label(i, i%num_class); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(10, 0.5); kernel->init(features, features); // create libsvm with C=10 and train CLaRank* svm = new CLaRank(10, kernel, labels); svm->train(); svm->train(); // classify on training examples CMulticlassLabels* output=(CMulticlassLabels*)svm->apply(); output->get_labels().display_vector("batch output"); /* assert that batch apply and apply(index_t) give same result */ SGVector<float64_t> single_outputs(output->get_num_labels()); for (index_t i=0; i<output->get_num_labels(); ++i) single_outputs[i]=svm->apply_one(i); single_outputs.display_vector("single_outputs"); for (index_t i=0; i<output->get_num_labels(); ++i) ASSERT(output->get_label(i)==single_outputs[i]); CMulticlassLabels* output_test= (CMulticlassLabels*)svm->apply(features_test); labels_test->get_labels().display_vector("labels_test"); output_test->get_labels().display_vector("output_test"); for (index_t i=0; i<output->get_num_labels(); ++i) ASSERT(labels_test->get_label(i)==output_test->get_label(i)); // free up memory SG_UNREF(output); SG_UNREF(labels_test); SG_UNREF(output_test); SG_UNREF(svm); } int main(int argc, char** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
#include <shogun/labels/LatentLabels.h> #include <shogun/features/LatentFeatures.h> #include <shogun/latent/LatentSVM.h> #include <shogun/features/DenseFeatures.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <shogun/mathematics/Math.h> #include <libgen.h> using namespace shogun; #define MAX_LINE_LENGTH 4096 #define HOG_SIZE 1488 struct CBoundingBox : public CData { CBoundingBox(int32_t x, int32_t y) : CData(), x_pos(x), y_pos(y) {}; int32_t x_pos, y_pos; /** @return name of SGSerializable */ virtual const char* get_name() const { return "BoundingBox"; } }; struct CHOGFeatures : public CData { CHOGFeatures(int32_t w, int32_t h) : CData(), width(w), height(h) {}; int32_t width, height; float64_t ***hog; /** @return name of SGSerializable */ virtual const char* get_name() const { return "HOGFeatures"; } }; class CObjectDetector: public CLatentModel { public: CObjectDetector() {} CObjectDetector(CLatentFeatures* feat, CLatentLabels* labels) : CLatentModel(feat, labels) {} virtual ~CObjectDetector() {} virtual int32_t get_dim() const { return HOG_SIZE; } virtual CDotFeatures* get_psi_feature_vectors() { int32_t num_examples = this->get_num_vectors(); int32_t dim = this->get_dim(); SGMatrix<float64_t> psi_m(dim, num_examples); for (int32_t i = 0; i < num_examples; ++i) { CHOGFeatures* hf = (CHOGFeatures*) m_features->get_sample(i); CBoundingBox* bb = (CBoundingBox*) m_labels->get_latent_label(i); memcpy(psi_m.matrix+i*dim, hf->hog[bb->x_pos][bb->y_pos], dim*sizeof(float64_t)); } CDenseFeatures<float64_t>* psi_feats = new CDenseFeatures<float64_t>(psi_m); return psi_feats; } virtual CData* infer_latent_variable(const SGVector<float64_t>& w, index_t idx) { int32_t pos_x = 0, pos_y = 0; float64_t max_score = -CMath::INFTY; CHOGFeatures* hf = (CHOGFeatures*) m_features->get_sample(idx); for (int i = 0; i < hf->width; ++i) { for (int j = 0; j < hf->height; ++j) { float64_t score = CMath::dot(w.vector, hf->hog[i][j], w.vlen); if (score > max_score) { pos_x = i; pos_y = j; max_score = score; } } } SG_SDEBUG("%d %d %f\n", pos_x, pos_y, max_score); CBoundingBox* h = new CBoundingBox(pos_x, pos_y); SG_REF(h); return h; } }; static void read_dataset(char* fname, CLatentFeatures*& feats, CLatentLabels*& labels) { FILE* fd = fopen(fname, "r"); char line[MAX_LINE_LENGTH]; char *pchar, *last_pchar; int num_examples,label,height,width; char* path = dirname(fname); if (fd == NULL) SG_SERROR("Cannot open input file %s!\n", fname); fgets(line, MAX_LINE_LENGTH, fd); num_examples = atoi(line); labels = new CLatentLabels(num_examples); SG_REF(labels); CBinaryLabels* ys = new CBinaryLabels(num_examples); feats = new CLatentFeatures(num_examples); SG_REF(feats); CMath::init_random(); for (int i = 0; (!feof(fd)) && (i < num_examples); ++i) { fgets(line, MAX_LINE_LENGTH, fd); pchar = line; while ((*pchar)!=' ') pchar++; *pchar = '\0'; pchar++; /* label: {-1, 1} */ last_pchar = pchar; while ((*pchar)!=' ') pchar++; *pchar = '\0'; label = (atoi(last_pchar) % 2 == 0) ? 1 : -1; pchar++; if (ys->set_label(i, label) == false) SG_SERROR("Couldn't set label for element %d\n", i); last_pchar = pchar; while ((*pchar)!=' ') pchar++; *pchar = '\0'; width = atoi(last_pchar); pchar++; last_pchar = pchar; while ((*pchar)!='\n') pchar++; *pchar = '\0'; height = atoi(last_pchar); /* create latent label */ int x = CMath::random(0, width-1); int y = CMath::random(0, height-1); CBoundingBox* bb = new CBoundingBox(x,y); labels->add_latent_label(bb); SG_SPROGRESS(i, 0, num_examples); CHOGFeatures* hog = new CHOGFeatures(width, height); hog->hog = SG_CALLOC(float64_t**, hog->width); for (int j = 0; j < width; ++j) { hog->hog[j] = SG_CALLOC(float64_t*, hog->height); for (int k = 0; k < height; ++k) { char filename[MAX_LINE_LENGTH]; hog->hog[j][k] = SG_CALLOC(float64_t, HOG_SIZE); sprintf(filename,"%s/%s.%03d.%03d.txt",path,line,j,k); FILE* f = fopen(filename, "r"); if (f == NULL) SG_SERROR("Could not open file: %s\n", filename); for (int l = 0; l < HOG_SIZE; ++l) fscanf(f,"%lf",&hog->hog[j][k][l]); fclose(f); } } feats->add_sample(hog); } fclose(fd); labels->set_labels(ys); SG_SDONE(); } int main(int argc, char** argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); /* check whether the train/test args are given */ if (argc < 3) { SG_SERROR("not enough arguements given\n"); } CLatentFeatures* train_feats = NULL; CLatentLabels* train_labels = NULL; /* read train data set */ read_dataset(argv[1], train_feats, train_labels); /* train the classifier */ float64_t C = 10.0; CObjectDetector* od = new CObjectDetector(train_feats, train_labels); CLatentSVM llm(od, C); llm.train(); // CLatentFeatures* test_feats = NULL; // CLatentLabels* test_labels = NULL; // read_dataset(argv[2], test_feats, test_labels); SG_SPRINT("Testing with the test set\n"); llm.apply(train_feats); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Kevin Hughes * Copyright (C) 2013 Kevin Hughes * * Thanks to Fernando Jose Iglesias Garcia (shogun) * and Matthieu Perrot (scikit-learn) */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/multiclass/MCLDA.h> #include <shogun/features/DenseFeatures.h> #include <shogun/io/SGIO.h> #include <shogun/lib/common.h> #include <shogun/features/DataGenerator.h> using namespace shogun; #define NUM 50 #define DIMS 2 #define CLASSES 2 void test() { #ifdef HAVE_LAPACK SGVector< float64_t > lab(CLASSES*NUM); SGMatrix< float64_t > feat(DIMS, CLASSES*NUM); feat = CDataGenerator::generate_gaussians(NUM,CLASSES,DIMS); for( int i = 0 ; i < CLASSES ; ++i ) for( int j = 0 ; j < NUM ; ++j ) lab[i*NUM+j] = double(i); // Create train labels CMulticlassLabels* labels = new CMulticlassLabels(lab); // Create train features CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feat); // Create QDA classifier CMCLDA* lda = new CMCLDA(features, labels); SG_REF(lda); lda->train(); // Classify and display output CMulticlassLabels* output=CLabelsFactory::to_multiclass(lda->apply()); SG_REF(output); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels()); // Free memory SG_UNREF(output); SG_UNREF(lda); #endif } int main(int argc, char ** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/classifier/svm/LibSVM.h> #include <iostream> using namespace shogun; //generates data points (of different classes) randomly void gen_rand_data(SGMatrix<float64_t> features, SGVector<float64_t> labels, float64_t distance) { index_t num_samples=labels.vlen; index_t dimensions=features.num_rows; for (int32_t i=0; i<num_samples; i++) { if (i<num_samples/2) { labels[i]=-1.0; for(int32_t j=0; j<dimensions; j++) features(j,i)=CMath::random(0.0,1.0)+distance; } else { labels[i]=1.0; for(int32_t j=0; j<dimensions; j++) features(j,i)=CMath::random(0.0,1.0)-distance; } } labels.display_vector("labels"); std::cout<<std::endl; features.display_matrix("features"); std::cout<<std::endl; } int main(int argc, char** argv) { init_shogun_with_defaults(); const float64_t svm_C=10; index_t num_samples=20; index_t dimensions=2; float64_t dist=0.5; SGMatrix<float64_t> featureMatrix(dimensions,num_samples); SGVector<float64_t> labelVector(num_samples); //random generation of data gen_rand_data(featureMatrix,labelVector,dist); //create train labels CLabels* labels=new CBinaryLabels(labelVector); //create train features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); SG_REF(features); features->set_feature_matrix(featureMatrix); //create linear kernel CLinearKernel* kernel=new CLinearKernel(); SG_REF(kernel); kernel->init(features, features); //create svm classifier by LibSVM CLibSVM* svm=new CLibSVM(svm_C,kernel, labels); SG_REF(svm); svm->train(); //classify data points CBinaryLabels* out_labels=CLabelsFactory::to_binary(svm->apply()); /*convert scores to calibrated probabilities by fitting a sigmoid function using the method described in Lin, H., Lin, C., and Weng, R. (2007). A note on Platt's probabilistic outputs for support vector machines. See BinaryLabels documentation for details*/ out_labels->scores_to_probabilities(); //display output labels and probabilities for (int32_t i=0; i<num_samples; i++) { SG_SPRINT("out[%d]=%f (%f)\n", i, out_labels->get_label(i), out_labels->get_value(i)); } //clean up SG_UNREF(out_labels); SG_UNREF(kernel); SG_UNREF(features); SG_UNREF(svm); exit_shogun(); return 0; }
#include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data SGMatrix<float64_t> matrix(2,3); for (int32_t i=0; i<6; i++) matrix.matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(); features->set_feature_matrix(matrix); // create three labels CBinaryLabels* labels=new CBinaryLabels(3); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(10, 0.5); kernel->init(features, features); // create libsvm with C=10 and train CLibSVM* svm = new CLibSVM(10, kernel, labels); svm->train(); // classify on training examples for (int32_t i=0; i<3; i++) SG_SPRINT("output[%d]=%f\n", i, svm->apply_one(i)); // free up memory SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2013 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/DenseLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/mkl/MKLClassification.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/classifier/svm/SVMLight.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/distance/MinkowskiMetric.h> using namespace shogun; CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(-1.0, 1.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(-1.0, 1.0, R_EXP); CCombinedKernel* kernel1=new CCombinedKernel(); kernel1->append_kernel(new CGaussianKernel(10, 2)); kernel1->append_kernel(new CGaussianKernel(10, 3)); kernel1->append_kernel(new CGaussianKernel(10, 4)); CModelSelectionParameters* param_kernel1=new CModelSelectionParameters( "kernel", kernel1); root->append_child(param_kernel1); CCombinedKernel* kernel2=new CCombinedKernel(); kernel2->append_kernel(new CGaussianKernel(10, 20)); kernel2->append_kernel(new CGaussianKernel(10, 30)); kernel2->append_kernel(new CGaussianKernel(10, 40)); CModelSelectionParameters* param_kernel2=new CModelSelectionParameters( "kernel", kernel2); root->append_child(param_kernel2); return root; } /** Demonstrates the MKL modelselection bug with SVMLight. See comments how to reproduce */ void test() { int32_t num_subsets=3; int32_t num_vectors=20; int32_t dim_vectors=3; /* create some data and labels */ SGMatrix<float64_t> matrix(dim_vectors, num_vectors); CBinaryLabels* labels=new CBinaryLabels(num_vectors); for (int32_t i=0; i<num_vectors*dim_vectors; i++) matrix.matrix[i]=CMath::randn_double(); /* create num_feautres 2-dimensional vectors */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); features->set_feature_matrix(matrix); /* create combined features */ CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* works */ CMKLClassification* classifier=new CMKLClassification(new CLibSVM()); classifier->set_interleaved_optimization_enabled(false); /* the above plus this does not work (interleaved only with SVMLight)*/ // classifier->set_interleaved_optimization_enabled(true); /* However, SVMLight does not work */ // CMKLClassification* classifier=new CMKLClassification(new CSVMLight()); // /* any of those */ // classifier->set_interleaved_optimization_enabled(false); // classifier->set_interleaved_optimization_enabled(true); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, comb_features, labels, splitting_strategy, evaluation_criterium); cross->set_num_runs(1); /* TODO: remove this once locking is fixed for combined kernels */ cross->set_autolock(false); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ classifier->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( cross, param_tree); bool print_state=true; CParameterCombination* best_combination=grid_search->select_model( print_state); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); // cross->set_conf_int_alpha(0.01); CEvaluationResult* result=cross->evaluate(); SG_SPRINT("result: "); result->print_result(); /* clean up destroy result parameter */ SG_UNREF(best_combination); SG_UNREF(grid_search); SG_UNREF(result); } int main(int argc, char **argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2009 Alexander Binder * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society */ #include <shogun/base/init.h> #include <iostream> #include <shogun/io/SGIO.h> #include <shogun/lib/ShogunException.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/classifier/mkl/MKLMulticlass.h> // g++ -Wall -O3 classifier_mklmulticlass.cpp -I /home/theseus/private/alx/shoguntrunk/compiledtmp/include -L/home/theseus/private/alx/shoguntrunk/compiledtmp/lib -lshogun using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_warning(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_error(FILE* target, const char* str) { fprintf(target, "%s", str); } void getgauss(float64_t & y1, float64_t & y2) { float x1, x2, w; do { x1 = 2.0 * rand()/(float64_t)RAND_MAX - 1.0; x2 = 2.0 * rand()/(float64_t)RAND_MAX - 1.0; w = x1 * x1 + x2 * x2; } while ( (w >= 1.0)|| (w<1e-9) ); w = sqrt( (-2.0 * log( w ) ) / w ); y1 = x1 * w; y2 = x2 * w; } void gendata(std::vector<float64_t> & x,std::vector<float64_t> & y, CMulticlassLabels*& lab) { int32_t totalsize=240; int32_t class1size=80; int32_t class2size=70; //generating three class data set x.resize(totalsize); y.resize(totalsize); for(size_t i=0; i< x.size();++i) getgauss(x[i], y[i]); for(size_t i=0; i< x.size();++i) { if((int32_t)i < class1size) { x[i]+=0; y[i]+=0; } else if( (int32_t)i< class1size+class2size) { x[i]+=+1; y[i]+=-1; } else { x[i]+=-1; y[i]+=+1; } } //set labels lab=new CMulticlassLabels(x.size()); for(size_t i=0; i< x.size();++i) { if((int32_t)i < class1size) lab->set_int_label(i,0); else if( (int32_t)i< class1size+class2size) lab->set_int_label(i,1); else lab->set_int_label(i,2); } } void gentrainkernel(float64_t * & ker1 ,float64_t * & ker2, float64_t * & ker3 ,float64_t & autosigma,float64_t & n1,float64_t & n2, float64_t & n3, const std::vector<float64_t> & x, const std::vector<float64_t> & y) { autosigma=0; for(size_t l=0; l< x.size();++l) { for(size_t r=0; r<= l;++r) { float64_t dist=((x[l]-x[r])*(x[l]-x[r]) + (y[l]-y[r])*(y[l]-y[r])); autosigma+=dist*2.0/(float64_t)x.size()/((float64_t)x.size()+1); } } float64_t fm1=0, mean1=0,fm2=0, mean2=0,fm3=0, mean3=0; ker1=SG_MALLOC(float64_t, x.size()*x.size()); ker2=SG_MALLOC(float64_t, x.size()*x.size()); ker3=SG_MALLOC(float64_t, x.size()*x.size()); for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< x.size();++r) { float64_t dist=((x[l]-x[r])*(x[l]-x[r]) + (y[l]-y[r])*(y[l]-y[r])); ker1[l +r*x.size()]= exp( -dist/autosigma/autosigma) ; //ker2[l +r*x.size()]= exp( -dist/sigma2/sigma2) ; ker2[l +r*x.size()]= x[l]*x[r] + y[l]*y[r]; ker3[l +r*x.size()]= (x[l]*x[r] + y[l]*y[r]+1)*(x[l]*x[r] + y[l]*y[r]+1); fm1+=ker1[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size()); fm2+=ker2[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size()); fm3+=ker3[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size()); if(l==r) { mean1+=ker1[l +r*x.size()]/(float64_t)x.size(); mean2+=ker2[l +r*x.size()]/(float64_t)x.size(); mean3+=ker3[l +r*x.size()]/(float64_t)x.size(); } } } n1=(mean1-fm1); n2=(mean2-fm2); n3=(mean3-fm3); for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< x.size();++r) { ker1[l +r*x.size()]=ker1[l +r*x.size()]/n1; ker2[l +r*x.size()]=ker2[l +r*x.size()]/n2; ker3[l +r*x.size()]=ker3[l +r*x.size()]/n3; } } } void gentestkernel(float64_t * & ker1 ,float64_t * & ker2,float64_t * & ker3, const float64_t autosigma,const float64_t n1,const float64_t n2, const float64_t n3, const std::vector<float64_t> & x,const std::vector<float64_t> & y, const std::vector<float64_t> & tx,const std::vector<float64_t> & ty) { ker1=SG_MALLOC(float64_t, x.size()*tx.size()); ker2=SG_MALLOC(float64_t, x.size()*tx.size()); ker3=SG_MALLOC(float64_t, x.size()*tx.size()); for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< tx.size();++r) { float64_t dist=((x[l]-tx[r])*(x[l]-tx[r]) + (y[l]-ty[r])*(y[l]-ty[r])); ker1[l +r*x.size()]= exp( -dist/autosigma/autosigma) ; ker2[l +r*x.size()]= x[l]*tx[r] + y[l]*ty[r]; ker3[l +r*x.size()]= (x[l]*tx[r] + y[l]*ty[r]+1)*(x[l]*tx[r] + y[l]*ty[r]+1); } } for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< tx.size();++r) { ker1[l +r*x.size()]=ker1[l +r*x.size()]/n1; ker2[l +r*x.size()]=ker2[l +r*x.size()]/n2; ker3[l +r*x.size()]=ker3[l +r*x.size()]/n2; } } } void tester() { CMulticlassLabels* lab=NULL; std::vector<float64_t> x,y; gendata(x,y, lab); SG_REF(lab); float64_t* ker1=NULL; float64_t* ker2=NULL; float64_t* ker3=NULL; float64_t autosigma=1; float64_t n1=0; float64_t n2=0; float64_t n3=0; int32_t numdata=0; gentrainkernel( ker1 , ker2, ker3 , autosigma, n1, n2, n3,x,y); numdata=x.size(); CCombinedKernel* ker=new CCombinedKernel(); CCustomKernel* kernel1=new CCustomKernel(); CCustomKernel* kernel2=new CCustomKernel(); CCustomKernel* kernel3=new CCustomKernel(); kernel1->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker1, numdata,numdata,false)); kernel2->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker2, numdata,numdata,false)); kernel3->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker3, numdata,numdata,false)); SG_FREE(ker1); SG_FREE(ker2); SG_FREE(ker3); ker->append_kernel(kernel1); ker->append_kernel(kernel2); ker->append_kernel(kernel3); //here comes the core stuff float64_t regconst=1.0; CMKLMulticlass* tsvm =new CMKLMulticlass(regconst, ker, lab); tsvm->set_epsilon(0.0001); // SVM epsilon // MKL parameters tsvm->set_mkl_epsilon(0.01); // subkernel weight L2 norm termination criterion tsvm->set_max_num_mkliters(120); // well it will be just three iterations tsvm->set_mkl_norm(1.5); // mkl norm //starting svm training tsvm->train(); SG_SPRINT("finished svm training\n"); //starting svm testing on training data CMulticlassLabels* res=CLabelsFactory::to_multiclass(tsvm->apply()); ASSERT(res); float64_t err=0; for(int32_t i=0; i<numdata;++i) { ASSERT(i< res->get_num_labels()); if (lab->get_int_label(i)!=res->get_int_label(i)) err+=1; } err/=(float64_t)res->get_num_labels(); SG_SPRINT("prediction error on training data (3 classes): %f ",err); SG_SPRINT("random guess error would be: %f \n",2/3.0); //generate test data CMulticlassLabels* tlab=NULL; std::vector<float64_t> tx,ty; gendata( tx,ty,tlab); SG_REF(tlab); float64_t* tker1=NULL; float64_t* tker2=NULL; float64_t* tker3=NULL; gentestkernel(tker1,tker2,tker3, autosigma, n1,n2,n3, x,y, tx,ty); int32_t numdatatest=tx.size(); CCombinedKernel* tker=new CCombinedKernel(); SG_REF(tker); CCustomKernel* tkernel1=new CCustomKernel(); CCustomKernel* tkernel2=new CCustomKernel(); CCustomKernel* tkernel3=new CCustomKernel(); tkernel1->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker1,numdata, numdatatest, false)); tkernel2->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker2,numdata, numdatatest, false)); tkernel3->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker2,numdata, numdatatest, false)); SG_FREE(tker1); SG_FREE(tker2); SG_FREE(tker3); tker->append_kernel(tkernel1); tker->append_kernel(tkernel2); tker->append_kernel(tkernel3); int32_t numweights; float64_t* weights=tsvm->getsubkernelweights(numweights); SG_SPRINT("test kernel weights\n"); for(int32_t i=0; i< numweights;++i) SG_SPRINT("%f ", weights[i]); SG_SPRINT("\n"); //set kernel tker->set_subkernel_weights(SGVector<float64_t>(weights, numweights)); tsvm->set_kernel(tker); //compute classification error, check mem CMulticlassLabels* tres=CLabelsFactory::to_multiclass(tsvm->apply()); float64_t terr=0; for(int32_t i=0; i<numdatatest;++i) { ASSERT(i< tres->get_num_labels()); if(tlab->get_int_label(i)!=tres->get_int_label(i)) terr+=1; } terr/=(float64_t) tres->get_num_labels(); SG_SPRINT("prediction error on test data (3 classes): %f ",terr); SG_SPRINT("random guess error would be: %f \n",2/3.0); SG_UNREF(tsvm); SG_UNREF(res); SG_UNREF(tres); SG_UNREF(lab); SG_UNREF(tlab); SG_UNREF(tker); SG_SPRINT( "finished \n"); } namespace shogun { extern Version* sg_version; extern SGIO* sg_io; } int main() { init_shogun(&print_message, &print_warning, &print_error); try { sg_version->print_version(); sg_io->set_loglevel(MSG_INFO); tester(); } catch(ShogunException & sh) { printf("%s",sh.get_exception_string()); } exit_shogun(); return 0; }
#include <shogun/io/CSVFile.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/SGIO.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/ecoc/ECOCStrategy.h> #include <shogun/multiclass/ecoc/ECOCOVREncoder.h> #include <shogun/multiclass/ecoc/ECOCHDDecoder.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/base/init.h> #define EPSILON 1e-5 using namespace shogun; // Training data const char fname_feats[]="../data/fm_train_real.dat"; const char fname_labels[]="../data/label_train_multiclass.dat"; void test() { /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); SG_REF(svm); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(true); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CECOCStrategy(new CECOCOVREncoder(), new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); // Classify the training examples and show the results CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply()); SGVector< int32_t > out_labels = output->get_int_labels(); SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen); // Free resources SG_UNREF(mc_svm); SG_UNREF(svm); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); } int main(int argc, char** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
#include <shogun/io/CSVFile.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/SGIO.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/ecoc/ECOCStrategy.h> #include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h> #include <shogun/multiclass/ecoc/ECOCHDDecoder.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/base/init.h> #define EPSILON 1e-5 using namespace shogun; /* file data */ const char fname_feats[]="../data/fm_train_real.dat"; const char fname_labels[]="../data/label_train_multiclass.dat"; void test() { /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); SG_REF(svm); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(true); CECOCDiscriminantEncoder *encoder = new CECOCDiscriminantEncoder(); encoder->set_features(features); encoder->set_labels(labels); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CECOCStrategy(encoder, new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); // Classify the training examples and show the results CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply()); SGVector< int32_t > out_labels = output->get_int_labels(); SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen); // Free resources SG_UNREF(mc_svm); SG_UNREF(svm); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); } int main(int argc, char** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
#include <shogun/io/CSVFile.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/SGIO.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/ecoc/ECOCStrategy.h> #include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h> #include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h> #include <shogun/multiclass/ecoc/ECOCHDDecoder.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/base/init.h> #define EPSILON 1e-5 using namespace shogun; /* file data */ const char fname_feats[]="../data/fm_train_real.dat"; const char fname_labels[]="../data/label_train_multiclass.dat"; void test() { /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); SG_REF(svm); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(true); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CECOCStrategy(new CECOCRandomDenseEncoder(), new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); // Classify the training examples and show the results CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply()); SGVector< int32_t > out_labels = output->get_int_labels(); SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen); // Free resources SG_UNREF(mc_svm); SG_UNREF(svm); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test(); exit_shogun(); return 0; }
#include <shogun/io/CSVFile.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/SGIO.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/MulticlassStrategy.h> #include <shogun/multiclass/MulticlassOneVsOneStrategy.h> #include <shogun/multiclass/MulticlassOneVsRestStrategy.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/base/init.h> #define EPSILON 1e-5 using namespace shogun; /* file data */ const char fname_feats[]="../data/fm_train_real.dat"; const char fname_labels[]="../data/label_train_multiclass.dat"; void test() { /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); SG_REF(svm); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(true); // Create a multiclass svm classifier that consists of several of the previous one // There are several heuristics are implemented: // OVA_NORM, OVA_SOFTMAX // OVO_PRICE, OVO_HASTIE, OVO_HAMAMURA CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CMulticlassOneVsOneStrategy(OVO_HASTIE), (CDotFeatures*) features, svm, labels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); // Classify the training examples and show the results CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply()); SGVector< int32_t > out_labels = output->get_int_labels(); SGVector<int32_t>::display_vector(out_labels.vector, out_labels.vlen); for (int32_t i=0; i<output->get_num_labels(); i++) { SG_SPRINT("out_values[%d] = ", i); SGVector<float64_t> out_values = output->get_multiclass_confidences(i); SGVector<float64_t>::display_vector(out_values.vector, out_values.vlen); SG_SPRINT("\n"); } //Free resources SG_UNREF(mc_svm); SG_UNREF(svm); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); } int main(int argc, char** argv) { init_shogun_with_defaults(); //sg_io->set_loglevel(MSG_DEBUG); test(); exit_shogun(); return 0; }
#include <shogun/io/CSVFile.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/SGIO.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/DenseSubsetFeatures.h> #include <shogun/base/init.h> #include <shogun/multiclass/tree/RelaxedTree.h> #include <shogun/multiclass/MulticlassLibLinear.h> #include <shogun/evaluation/MulticlassAccuracy.h> #include <shogun/kernel/GaussianKernel.h> #define EPSILON 1e-5 using namespace shogun; const char* fname_feats = "../data/7class_example4_train.dense"; const char* fname_labels = "../data/7class_example4_train.label"; int main(int argc, char** argv) { init_shogun_with_defaults(); /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); // Create RelaxedTree Machine CRelaxedTree *machine = new CRelaxedTree(); SG_REF(machine); machine->set_labels(labels); CKernel *kernel = new CGaussianKernel(); SG_REF(kernel); machine->set_kernel(kernel); CMulticlassLibLinear *svm = new CMulticlassLibLinear(); machine->set_machine_for_confusion_matrix(svm); machine->train(features); CMulticlassLabels* output = CLabelsFactory::to_multiclass(machine->apply()); CMulticlassAccuracy *evaluator = new CMulticlassAccuracy(); SG_SPRINT("Accuracy = %.4f\n", evaluator->evaluate(output, labels)); // Free resources SG_UNREF(machine); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(evaluator); SG_UNREF(kernel); exit_shogun(); return 0; }
#include <shogun/io/CSVFile.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/SGIO.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/DenseSubsetFeatures.h> #include <shogun/base/init.h> #include <shogun/multiclass/ShareBoost.h> #define EPSILON 1e-5 using namespace shogun; const char* fname_feats = "../data/7class_example4_train.dense"; const char* fname_labels = "../data/7class_example4_train.label"; int main(int argc, char** argv) { init_shogun_with_defaults(); /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); SG_SPRINT("Performing ShareBoost on a %d-class problem\n", labels->get_num_classes()); // Create ShareBoost Machine CShareBoost *machine = new CShareBoost(features, labels, 10); SG_REF(machine); machine->train(); SGVector<int32_t> activeset = machine->get_activeset(); SG_SPRINT("%d out of %d features are selected:\n", activeset.vlen, mat.num_rows); for (int32_t i=0; i < activeset.vlen; ++i) SG_SPRINT("activeset[%02d] = %d\n", i, activeset[i]); CDenseSubsetFeatures<float64_t> *subset_fea = new CDenseSubsetFeatures<float64_t>(features, machine->get_activeset()); SG_REF(subset_fea); CMulticlassLabels* output = CLabelsFactory::to_multiclass(machine->apply(subset_fea)); int32_t correct = 0; for (int32_t i=0; i < output->get_num_labels(); ++i) if (output->get_int_label(i) == labels->get_int_label(i)) correct++; SG_SPRINT("Accuracy = %.4f\n", float64_t(correct)/labels->get_num_labels()); // Free resources SG_UNREF(machine); SG_UNREF(output); SG_UNREF(subset_fea); SG_UNREF(features); SG_UNREF(labels); exit_shogun(); return 0; }
#include <shogun/labels/MulticlassLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/multiclass/MulticlassLibSVM.h> #include <shogun/base/init.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); index_t num_vec=3; index_t num_feat=2; index_t num_class=2; // create some data SGMatrix<float64_t> matrix(num_feat, num_vec); SGVector<float64_t>::range_fill_vector(matrix.matrix, num_feat*num_vec); // create vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); // create three labels CMulticlassLabels* labels=new CMulticlassLabels(num_vec); for (index_t i=0; i<num_vec; ++i) labels->set_label(i, i%num_class); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(10, 0.5); kernel->init(features, features); // create libsvm with C=10 and train CMulticlassLibSVM* svm = new CMulticlassLibSVM(10, kernel, labels); svm->train(); // classify on training examples CMulticlassLabels* output=CLabelsFactory::to_multiclass(svm->apply()); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "batch output"); /* assert that batch apply and apply(index_t) give same result */ for (index_t i=0; i<output->get_num_labels(); ++i) { float64_t label=svm->apply_one(i); SG_SPRINT("single output[%d]=%f\n", i, label); ASSERT(output->get_label(i)==label); } SG_UNREF(output); // free up memory SG_UNREF(svm); exit_shogun(); return 0; }
#include <shogun/io/CSVFile.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/SGIO.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/MulticlassOneVsOneStrategy.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/base/init.h> #define EPSILON 1e-5 using namespace shogun; /* file data */ const char fname_feats[]="../data/fm_train_real.dat"; const char fname_labels[]="../data/label_train_multiclass.dat"; void test() { /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); SG_REF(svm); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(true); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CMulticlassOneVsOneStrategy(), (CDotFeatures*) features, svm, labels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); // Classify the training examples and show the results CMulticlassLabels* output = CLabelsFactory::to_multiclass(mc_svm->apply()); SGVector< int32_t > out_labels = output->get_int_labels(); SGVector<int32_t>::display_vector(out_labels.vector, out_labels.vlen); //Free resources SG_UNREF(mc_svm); SG_UNREF(svm); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); } int main(int argc, char** argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); test(); exit_shogun(); return 0; }
#include <shogun/features/Labels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/distance/EuclideanDistance.h> #include <shogun/classifier/NearestCentroid.h> #include <shogun/base/init.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(){ init_shogun(&print_message); index_t num_vec=7; index_t num_feat=2; index_t num_class=2; // create some data SGMatrix<float64_t> matrix(num_feat, num_vec); CMath::range_fill_vector(matrix.matrix, num_feat*num_vec); // Create features ; shogun will now own the matrix created CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); CMath::display_matrix(matrix.matrix,num_feat,num_vec); //Create labels CLabels* labels=new CLabels(num_vec); for (index_t i=0; i<num_vec; ++i) labels->set_label(i, i%num_class); //Create Euclidean Distance CEuclideanDistance* distance = new CEuclideanDistance(features,features); //Create Nearest Centroid CNearestCentroid* nearest_centroid = new CNearestCentroid(distance, labels); nearest_centroid->train(); // classify on training examples CLabels* output=nearest_centroid->apply(); CMath::display_vector(output->get_labels().vector, output->get_num_labels(), "batch output"); SG_UNREF(output); // free up memory SG_UNREF(nearest_centroid); exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/features/Labels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/mathematics/Math.h> #include <shogun/classifier/svm/NewtonSVM.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc,char *argv[]) { init_shogun(&print_message,&print_message,&print_message);//initialising shogun without giving arguments shogun wont be able to print int32_t x_n=4,x_d=2;//X dimensions : x_n for no of datapoints and x_d for dimensionality of data SGMatrix<float64_t> fmatrix(x_d,x_n); SG_SPRINT("\nTEST 1:\n\n"); /*Initialising Feature Matrix */ for (int i=0; i<x_n*x_d; i++) fmatrix.matrix[i] = i+1; SG_SPRINT("FEATURE MATRIX :\n"); CMath::display_matrix(fmatrix.matrix,x_d,x_n); CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(fmatrix); SG_REF(features); /*Creating random labels */ CLabels* labels=new CLabels(x_n); // create labels, two classes labels->set_label(0,1); labels->set_label(1,-1); labels->set_label(2,1); labels->set_label(3,1); SG_REF(labels); /*Working with Newton SVM */ float64_t lambda=1.0; int32_t iter=20; CNewtonSVM *nsvm = new CNewtonSVM(lambda,features,labels,iter); SG_REF(nsvm); nsvm->train(); SG_UNREF(labels); SG_UNREF(nsvm); SG_SPRINT("TEST 2:\n\n"); x_n=5; x_d=3; SGMatrix<float64_t> fmatrix2(x_d,x_n); for (int i=0; i<x_n*x_d; i++) fmatrix2.matrix[i] = i+1; SG_SPRINT("FEATURE MATRIX :\n"); CMath::display_matrix(fmatrix2.matrix,x_d,x_n); features->set_feature_matrix(fmatrix2); SG_REF(features); /*Creating random labels */ CLabels* labels2=new CLabels(x_n); // create labels, two classes labels2->set_label(0,1); labels2->set_label(1,-1); labels2->set_label(2,1); labels2->set_label(3,1); labels2->set_label(4,-1); SG_REF(labels2); /*Working with Newton SVM */ lambda=1.0; iter=20; CNewtonSVM *nsvm2 = new CNewtonSVM(lambda,features,labels2,iter); SG_REF(nsvm2); nsvm2->train(); SG_UNREF(labels2); SG_UNREF(nsvm2); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Fernando Jose Iglesias Garcia * Copyright (C) 2012 Fernando Jose Iglesias Garcia */ #include <shogun/base/init.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/multiclass/QDA.h> #include <shogun/features/DenseFeatures.h> #include <shogun/io/SGIO.h> #include <shogun/lib/common.h> #include <shogun/features/DataGenerator.h> using namespace shogun; #define NUM 50 #define DIMS 2 #define CLASSES 2 void test() { #ifdef HAVE_LAPACK SGVector< float64_t > lab(CLASSES*NUM); SGMatrix< float64_t > feat(DIMS, CLASSES*NUM); feat = CDataGenerator::generate_gaussians(NUM,CLASSES,DIMS); for( int i = 0 ; i < CLASSES ; ++i ) for( int j = 0 ; j < NUM ; ++j ) lab[i*NUM+j] = double(i); // Create train labels CMulticlassLabels* labels = new CMulticlassLabels(lab); // Create train features CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feat); // Create QDA classifier CQDA* qda = new CQDA(features, labels); SG_REF(qda); qda->train(); // Classify and display output CMulticlassLabels* output = CLabelsFactory::to_multiclass(qda->apply()); SG_REF(output); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels()); // Free memory SG_UNREF(output); SG_UNREF(qda); #endif // HAVE_LAPACK } int main(int argc, char ** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/StringFeatures.h> #include <shogun/classifier/svm/SVMLight.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/lib/SGStringList.h> using namespace shogun; #ifdef USE_SVMLIGHT void test_svmlight() { /* data is random length strings with only zeros (A) or ones (B) */ index_t num_train=100; index_t num_test=50; index_t max_length=100; float64_t p_x=0.5; // probability for class A float64_t mostly_prob=0.8; CDenseLabels* labels=new CBinaryLabels(num_train+num_test); CMath::init_random(17); SGStringList<char> data(num_train+num_test, max_length); for (index_t i=0; i<num_train+num_test; ++i) { /* determine length */ index_t length=CMath::random(1, max_length); /* allocate string */ data.strings[i]=SGString<char>(length); /* fill with elements and set label */ if (p_x<CMath::random(0.0, 1.0)) { labels->set_label(i, 1); for (index_t j=0; j<length; ++j) { char c=mostly_prob<CMath::random(0.0, 1.0) ? '0' : '1'; data.strings[i].string[j]=c; } } else { labels->set_label(i, -1); for (index_t j=0; j<length; ++j) { char c=mostly_prob<CMath::random(0.0, 1.0) ? '1' : '0'; data.strings[i].string[j]=c; } } SG_SPRINT("datum %d, class %d:\t", i, labels->get_int_label(i)); for (index_t j=0; j<length; ++j) SG_SPRINT("%c", data.strings[i].string[j]); SG_SPRINT("\n"); } CStringFeatures<char>* feats=new CStringFeatures<char>(data, BINARY); /* copy training and test data */ SGVector<index_t> train_inds(num_train); train_inds.range_fill(); SGVector<index_t> test_inds(num_test); test_inds.range_fill(); test_inds.add(num_train); CStringFeatures<char>* feats_train= (CStringFeatures<char>*)feats->copy_subset(train_inds); CStringFeatures<char>* feats_test= (CStringFeatures<char>*)feats->copy_subset(test_inds); labels->add_subset(train_inds); CLabels* labels_train=new CBinaryLabels(labels->get_labels_copy()); labels->remove_subset(); labels->add_subset(test_inds); CLabels* labels_test=new CBinaryLabels(labels->get_labels_copy()); labels->remove_subset(); /* string kernel */ CDistantSegmentsKernel* kernel=new CDistantSegmentsKernel(10, 2, 2); /* SVM training and testing without precomputing the kernel */ float64_t C=1; CSVM* svm=new CSVMLight(C, kernel, labels_train); // CSVM* svm=new CLibSVM(C, kernel, labels_train); svm->parallel->set_num_threads(1); svm->set_store_model_features(false); svm->train(feats_train); SGVector<float64_t> alphas=svm->get_alphas(); SGVector<index_t> svs=svm->get_support_vectors(); float64_t bias=svm->get_bias(); CBinaryLabels* predictions=(CBinaryLabels*)svm->apply(feats_test); alphas.display_vector("alphas"); svs.display_vector("svs"); SG_SPRINT("bias: %f\n", bias); /* now the same with a precopumputed kernel */ kernel->init(feats, feats); CCustomKernel* precomputed=new CCustomKernel(kernel); precomputed->add_row_subset(train_inds); precomputed->add_col_subset(train_inds); SGMatrix<float64_t> km_train=precomputed->get_kernel_matrix(); precomputed->remove_col_subset(); precomputed->add_col_subset(test_inds); SGMatrix<float64_t> km_test=precomputed->get_kernel_matrix(); precomputed->remove_row_subset(); precomputed->remove_col_subset(); SGMatrix<float64_t> km=precomputed->get_kernel_matrix(); // km.display_matrix("FULL"); // km_train.display_matrix("TRAIN"); // km_test.display_matrix("TEST"); /* make sure matrices are correct */ for (index_t i=0; i<km_train.num_rows; ++i) { for (index_t j=0; j<km_train.num_cols; ++j) ASSERT(km_train(i, j)==km(i, j)); } for (index_t i=0; i<km_test.num_rows; ++i) { for (index_t j=0; j<km_test.num_cols; ++j) ASSERT(km_test(i, j)==km(i, j+num_train)); } /* train and test again on custom kernel */ svm->set_kernel(new CCustomKernel(km_train)); svm->train(); SGVector<float64_t> alphas_precomputed=svm->get_alphas(); SGVector<index_t> svs_precomputed=svm->get_support_vectors(); float64_t bias_precomputed=svm->get_bias(); alphas_precomputed.display_vector("alphas_precomputed"); svs_precomputed.display_vector("svs_precomputed"); SG_SPRINT("bias_precomputed: %f\n", bias_precomputed); svm->set_kernel(new CCustomKernel(km_test)); CBinaryLabels* predictions_precomputed=(CBinaryLabels*)svm->apply(); /* assert that the SV, alphas and b are equal, sort before (they may have * a different ordering */ CMath::qsort(alphas.vector, alphas.vlen); CMath::qsort(alphas_precomputed.vector, alphas_precomputed.vlen); CMath::qsort(svs.vector, svs.vlen); CMath::qsort(svs_precomputed.vector, svs_precomputed.vlen); ASSERT(alphas.vlen==alphas_precomputed.vlen); ASSERT(svs.vlen==svs_precomputed.vlen); for (index_t i=0; i<alphas.vlen; ++i) { ASSERT(CMath::abs(alphas[i]-alphas_precomputed[i])<1E-3); ASSERT(svs[i]==svs_precomputed[i]); } ASSERT(CMath::abs(bias-bias_precomputed)<1E-3); /* assert that predictions are the same */ predictions->get_int_labels().display_vector("predictions"); predictions_precomputed->get_int_labels(). display_vector("predictions_precomputed"); for (index_t i=0; i<predictions->get_num_labels(); ++i) { ASSERT(predictions->get_int_label(i)== predictions_precomputed->get_int_label(i)); } /* clean up */ SG_SPRINT("cleaning up\n"); SG_UNREF(svm); SG_UNREF(precomputed); SG_UNREF(labels); SG_UNREF(labels_test); SG_UNREF(predictions); SG_UNREF(predictions_precomputed); SG_UNREF(feats_train); SG_UNREF(feats_test); } int main() { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_svmlight(); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/clustering/KMeans.h> #include <shogun/distance/EuclideanDistance.h> #include <shogun/distance/MinkowskiMetric.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); int32_t num_clusters=4; int32_t num_features=11; int32_t dim_features=3; int32_t num_vectors_per_cluster=5; float64_t cluster_std_dev=2.0; /* build random cluster centers */ SGMatrix<float64_t> cluster_centers(dim_features, num_clusters); SGVector<float64_t>::random_vector(cluster_centers.matrix, dim_features*num_clusters, -10.0, 10.0); SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows, cluster_centers.num_cols, "cluster centers"); /* create data around clusters */ SGMatrix<float64_t> data(dim_features, num_clusters*num_vectors_per_cluster); for (index_t i=0; i<num_clusters; ++i) { for (index_t j=0; j<dim_features; ++j) { for (index_t k=0; k<num_vectors_per_cluster; ++k) { index_t idx=i*dim_features*num_vectors_per_cluster; idx+=j; idx+=k*dim_features; float64_t entry=cluster_centers.matrix[i*dim_features+j]; data.matrix[idx]=CMath::normal_random(entry, cluster_std_dev); } } } /* create features, SG_REF to avoid deletion */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(data); SG_REF(features); /* create labels for cluster centers */ CMulticlassLabels* labels=new CMulticlassLabels(num_features); for (index_t i=0; i<num_features; ++i) labels->set_label(i, i%2==0 ? 0 : 1); /* create distance */ CEuclideanDistance* distance=new CEuclideanDistance(features, features); /* create distance machine */ CKMeans* clustering=new CKMeans(num_clusters, distance); clustering->train(features); /* build clusters */ CMulticlassLabels* result=CLabelsFactory::to_multiclass(clustering->apply()); for (index_t i=0; i<result->get_num_labels(); ++i) SG_SPRINT("cluster index of vector %i: %f\n", i, result->get_label(i)); /* print cluster centers */ CDenseFeatures<float64_t>* centers= (CDenseFeatures<float64_t>*)distance->get_lhs(); SGMatrix<float64_t> centers_matrix=centers->get_feature_matrix(); SGMatrix<float64_t>::display_matrix(centers_matrix.matrix, centers_matrix.num_rows, centers_matrix.num_cols, "learned centers"); SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows, cluster_centers.num_cols, "real centers"); /* clean up */ SG_UNREF(result); SG_UNREF(centers); SG_UNREF(clustering); SG_UNREF(labels); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/DiffusionMaps.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CDiffusionMaps* dmaps = new CDiffusionMaps(); dmaps->set_target_dim(2); dmaps->set_t(10); dmaps->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = dmaps->embed(features); SG_UNREF(embedding); SG_UNREF(dmaps); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Sergey Lisitsyn */ #include <shogun/lib/config.h> #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/FactorAnalysis.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CFactorAnalysis* fa = new CFactorAnalysis(); CDenseFeatures<double>* embedding = fa->embed(features); SG_UNREF(embedding); SG_UNREF(fa); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/HessianLocallyLinearEmbedding.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CHessianLocallyLinearEmbedding* hlle = new CHessianLocallyLinearEmbedding(); hlle->set_target_dim(2); hlle->set_k(8); hlle->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = hlle->embed(features); SG_UNREF(embedding); SG_UNREF(hlle); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/Isomap.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CIsomap* isomap = new CIsomap(); isomap->set_target_dim(2); isomap->set_landmark(false); isomap->set_k(4); isomap->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = isomap->embed(features); SG_UNREF(embedding); SG_UNREF(isomap); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Kevin Hughes * * Thanks to Andreas Ziehe */ #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <iostream> using namespace shogun; #include <shogun/features/DenseFeatures.h> #include <shogun/mathematics/Math.h> #include <shogun/mathematics/eigen3.h> #include <shogun/converter/ica/Jade.h> #include <shogun/evaluation/ica/PermutationMatrix.h> #include <shogun/evaluation/ica/AmariIndex.h> using namespace Eigen; void test() { // Generate sample data CMath::init_random(0); int n_samples = 2000; VectorXd time(n_samples, true); time.setLinSpaced(n_samples,0,10); // Source Signals MatrixXd S(2,n_samples); for(int i = 0; i < n_samples; i++) { // Sin wave S(0,i) = sin(2*time[i]); S(0,i) += 0.2*CMath::randn_double(); // Square wave S(1,i) = sin(3*time[i]) < 0 ? -1 : 1; S(1,i) += 0.2*CMath::randn_double(); } // Standardize data VectorXd avg = S.rowwise().sum() / n_samples; VectorXd std = ((S.colwise() - avg).array().pow(2).rowwise().sum() / n_samples).array().sqrt(); for(int i = 0; i < n_samples; i++) S.col(i) = S.col(i).cwiseQuotient(std); // Mixing Matrix SGMatrix<float64_t> mixing_matrix(2,2); Map<MatrixXd> A(mixing_matrix.matrix,2,2); A(0,0) = 1; A(0,1) = 0.5; A(1,0) = 0.5; A(1,1) = 1; std::cout << "Mixing Matrix:" << std::endl; std::cout << A << std::endl << std::endl; // Mix signals SGMatrix<float64_t> X(2,n_samples); Map<MatrixXd> EX(X.matrix,2,n_samples); EX = A * S; CDenseFeatures< float64_t >* mixed_signals = new CDenseFeatures< float64_t >(X); // Separate CJade* jade = new CJade(); SG_REF(jade); CFeatures* signals = jade->apply(mixed_signals); SG_REF(signals); // Close to a permutation matrix (with random scales) Map<MatrixXd> EA(jade->get_mixing_matrix().matrix,2,2); std::cout << "Estimated Mixing Matrix:" << std::endl; std::cout << EA << std::endl << std::endl; SGMatrix<float64_t> P(2,2); Eigen::Map<MatrixXd> EP(P.matrix,2,2); EP = EA.inverse() * A; bool isperm = is_permutation_matrix(P); std::cout << "EA^-1 * A == Permuatation Matrix is: " << isperm << std::endl; float64_t amari_err = amari_index(jade->get_mixing_matrix(), mixing_matrix, true); std::cout << "Amari Error: " << amari_err << std::endl; SG_UNREF(jade); SG_UNREF(mixed_signals); SG_UNREF(signals); return; } int main(int argc, char ** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/KernelLocallyLinearEmbedding.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CKernelLocallyLinearEmbedding* klle = new CKernelLocallyLinearEmbedding(); CKernel* kernel = new CLinearKernel(); klle->set_target_dim(2); klle->set_k(4); klle->set_kernel(kernel); klle->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = klle->embed(features); SG_UNREF(embedding); SG_UNREF(klle); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LaplacianEigenmaps.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLaplacianEigenmaps* lem = new CLaplacianEigenmaps(); lem->set_target_dim(2); lem->set_k(10); lem->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = lem->embed(features); SG_UNREF(embedding); SG_UNREF(lem); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LinearLocalTangentSpaceAlignment.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLinearLocalTangentSpaceAlignment* lltsa = new CLinearLocalTangentSpaceAlignment(); lltsa->set_target_dim(2); lltsa->set_k(4); lltsa->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = lltsa->embed(features); SG_UNREF(embedding); SG_UNREF(lltsa); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LocalityPreservingProjections.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLocalityPreservingProjections* lpp = new CLocalityPreservingProjections(); lpp->set_target_dim(2); lpp->set_k(10); lpp->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = lpp->embed(features); SG_UNREF(embedding); SG_UNREF(lpp); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LocallyLinearEmbedding.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLocallyLinearEmbedding* lle = new CLocallyLinearEmbedding(); lle->set_target_dim(2); lle->set_k(4); lle->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = lle->embed(features); SG_UNREF(embedding); SG_UNREF(lle); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LocalTangentSpaceAlignment.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLocalTangentSpaceAlignment* ltsa = new CLocalTangentSpaceAlignment(); ltsa->set_target_dim(2); ltsa->set_k(4); ltsa->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = ltsa->embed(features); SG_UNREF(embedding); SG_UNREF(ltsa); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/MultidimensionalScaling.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CMultidimensionalScaling* mds = new CMultidimensionalScaling(); mds->set_target_dim(2); mds->set_landmark(true); mds->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = mds->embed(features); SG_UNREF(embedding); SG_UNREF(mds); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/NeighborhoodPreservingEmbedding.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CNeighborhoodPreservingEmbedding* npe = new CNeighborhoodPreservingEmbedding(); npe->set_target_dim(2); npe->set_k(15); npe->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = npe->embed(features); SG_UNREF(embedding); SG_UNREF(npe); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Fernando José Iglesias GarcÃa * Copyright (C) 2012 Fernando José Iglesias GarcÃa */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/StochasticProximityEmbedding.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main() { init_shogun_with_defaults(); int N = 100; int dim = 3; // Generate toy data SGMatrix< float64_t > matrix(dim, N); for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(matrix); SG_REF(features); // Create embedding and set parameters for global strategy CStochasticProximityEmbedding* spe = new CStochasticProximityEmbedding(); spe->set_target_dim(2); spe->set_strategy(SPE_GLOBAL); spe->set_nupdates(40); SG_REF(spe); // Apply embedding with global strategy CDenseFeatures< float64_t >* embedding = spe->embed(features); SG_REF(embedding); // Set parameters for local strategy spe->set_strategy(SPE_LOCAL); spe->set_k(12); // Apply embedding with local strategy SG_UNREF(embedding); embedding = spe->embed(features); SG_REF(embedding); // Free memory SG_UNREF(embedding); SG_UNREF(spe); SG_UNREF(features); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_cross_validation() { /* data matrix dimensions */ index_t num_vectors=40; index_t num_features=5; /* data means -1, 1 in all components, std deviation of 3 */ SGVector<float64_t> mean_1(num_features); SGVector<float64_t> mean_2(num_features); SGVector<float64_t>::fill_vector(mean_1.vector, mean_1.vlen, -1.0); SGVector<float64_t>::fill_vector(mean_2.vector, mean_2.vlen, 1.0); float64_t sigma=3; SGVector<float64_t>::display_vector(mean_1.vector, mean_1.vlen, "mean 1"); SGVector<float64_t>::display_vector(mean_2.vector, mean_2.vlen, "mean 2"); /* fill data matrix around mean */ SGMatrix<float64_t> train_dat(num_features, num_vectors); for (index_t i=0; i<num_vectors; ++i) { for (index_t j=0; j<num_features; ++j) { float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0]; train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma); } } /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels +/- 1 for each cluster */ SGVector<float64_t> lab(num_vectors); for (index_t i=0; i<num_vectors; ++i) lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0; CBinaryLabels* labels=new CBinaryLabels(lab); /* gaussian kernel */ int32_t kernel_cache=100; int32_t width=10; CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, width); kernel->init(features, features); /* create svm via libsvm */ float64_t svm_C=10; float64_t svm_eps=0.0001; CLibSVM* svm=new CLibSVM(svm_C, kernel, labels); svm->set_epsilon(svm_eps); /* train and output */ svm->train(features); CBinaryLabels* output=CLabelsFactory::to_binary(svm->apply(features)); for (index_t i=0; i<num_vectors; ++i) SG_SPRINT("i=%d, class=%f,\n", i, output->get_label(i)); /* evaluation criterion */ CContingencyTableEvaluation* eval_crit= new CContingencyTableEvaluation(ACCURACY); /* evaluate training error */ float64_t eval_result=eval_crit->evaluate(output, labels); SG_SPRINT("training error: %f\n", eval_result); SG_UNREF(output); /* assert that regression "works". this is not guaranteed to always work * but should be a really coarse check to see if everything is going * approx. right */ ASSERT(eval_result<2); /* splitting strategy */ index_t n_folds=5; CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(svm, features, labels, splitting, eval_crit); cross->set_num_runs(10); // cross->set_conf_int_alpha(0.05); /* actual evaluation */ CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CrossValidationResult!"); result->print_result(); /* clean up */ SG_UNREF(result); SG_UNREF(cross); SG_UNREF(features); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); sg_io->set_loglevel(MSG_DEBUG); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society * Written (W) 2013 Saurabh Mahindre */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/multiclass/KNN.h> #include <shogun/io/SGIO.h> #include <shogun/io/CSVFile.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/MulticlassAccuracy.h> #include <shogun/distance/EuclideanDistance.h> using namespace shogun; // Prepare to read a file for the training data const char fname_feats[] = "../data/fm_train_real.dat"; const char fname_labels[] = "../data/label_train_multiclass.dat"; void test_cross_validation() { index_t k =4; /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); /* create knn */ CEuclideanDistance* distance = new CEuclideanDistance(features, features); CKNN* knn=new CKNN (k, distance, labels); /* train and output */ knn->train(features); CMulticlassLabels* output=CLabelsFactory::to_multiclass(knn->apply(features)); for (index_t i=0; i<features->get_num_vectors(); ++i) SG_SPRINT("i=%d, class=%f,\n", i, output->get_label(i)); /* evaluation criterion */ CMulticlassAccuracy* eval_crit = new CMulticlassAccuracy (); /* evaluate training error */ float64_t eval_result=eval_crit->evaluate(output, labels); SG_SPRINT("training accuracy: %f\n", eval_result); SG_UNREF(output); /* assert that regression "works". this is not guaranteed to always work * but should be a really coarse check to see if everything is going * approx. right */ ASSERT(eval_result<2); /* splitting strategy */ index_t n_folds=5; CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(knn, features, labels, splitting, eval_crit); cross->set_num_runs(1); // cross->set_conf_int_alpha(0.05); /* actual evaluation */ CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); result->print_result(); /* clean up */ SG_UNREF(result); SG_UNREF(cross); SG_UNREF(features); SG_UNREF(labels); } int main(int argc, char **argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/classifier/svm/SVMLight.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/lib/Time.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_cross_validation() { /* data matrix dimensions */ index_t num_vectors=50; index_t num_features=5; /* data means -1, 1 in all components, std deviation of sigma */ SGVector<float64_t> mean_1(num_features); SGVector<float64_t> mean_2(num_features); SGVector<float64_t>::fill_vector(mean_1.vector, mean_1.vlen, -1.0); SGVector<float64_t>::fill_vector(mean_2.vector, mean_2.vlen, 1.0); float64_t sigma=1.5; /* fill data matrix around mean */ SGMatrix<float64_t> train_dat(num_features, num_vectors); for (index_t i=0; i<num_vectors; ++i) { for (index_t j=0; j<num_features; ++j) { float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0]; train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma); } } /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels +/- 1 for each cluster */ SGVector<float64_t> lab(num_vectors); for (index_t i=0; i<num_vectors; ++i) lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0; CBinaryLabels* labels=new CBinaryLabels(lab); /* gaussian kernel */ CGaussianKernel* kernel=new CGaussianKernel(); kernel->set_width(10); kernel->init(features, features); /* create svm via libsvm */ float64_t svm_C=1; float64_t svm_eps=0.0001; CSVM* svm=new CLibSVM(svm_C, kernel, labels); svm->set_epsilon(svm_eps); /* train and output the normal way */ SG_SPRINT("starting normal training\n"); svm->train(features); CBinaryLabels* output=CLabelsFactory::to_binary(svm->apply(features)); /* evaluation criterion */ CContingencyTableEvaluation* eval_crit= new CContingencyTableEvaluation(ACCURACY); /* evaluate training error */ float64_t eval_result=eval_crit->evaluate(output, labels); SG_SPRINT("training accuracy: %f\n", eval_result); SG_UNREF(output); /* assert that regression "works". this is not guaranteed to always work * but should be a really coarse check to see if everything is going * approx. right */ ASSERT(eval_result<2); /* splitting strategy */ index_t n_folds=3; CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(svm, features, labels, splitting, eval_crit); cross->set_num_runs(5); // cross->set_conf_int_alpha(0.05); CCrossValidationResult* tmp; /* no locking */ index_t repetitions=5; SG_SPRINT("unlocked x-val\n"); kernel->init(features, features); cross->set_autolock(false); CTime time; time.start(); for (index_t i=0; i<repetitions; ++i) { tmp = (CCrossValidationResult*)cross->evaluate(); SG_UNREF(tmp); } time.stop(); SG_SPRINT("%f sec\n", time.cur_time_diff()); /* auto_locking in every iteration of this loop (better, not so nice) */ SG_SPRINT("locked in every iteration x-val\n"); cross->set_autolock(true); time.start(); for (index_t i=0; i<repetitions; ++i) { tmp = (CCrossValidationResult*)cross->evaluate(); SG_UNREF(tmp); } time.stop(); SG_SPRINT("%f sec\n", time.cur_time_diff()); /* lock once before, (no locking/unlocking in this loop) */ svm->data_lock(labels, features); SG_SPRINT("locked x-val\n"); time.start(); for (index_t i=0; i<repetitions; ++i) { tmp = (CCrossValidationResult*)cross->evaluate(); SG_UNREF(tmp); } time.stop(); SG_SPRINT("%f sec\n", time.cur_time_diff()); /* clean up */ SG_UNREF(cross); SG_UNREF(features); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/mkl/MKLClassification.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/CrossValidationPrintOutput.h> #include <shogun/evaluation/CrossValidationMKLStorage.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/mathematics/Statistics.h> using namespace shogun; void gen_rand_data(SGVector<float64_t> lab, SGMatrix<float64_t> feat, float64_t dist) { index_t dims=feat.num_rows; index_t num=lab.vlen; for (int32_t i=0; i<num; i++) { if (i<num/2) { lab[i]=-1.0; for (int32_t j=0; j<dims; j++) feat(j, i)=CMath::random(0.0, 1.0)+dist; } else { lab[i]=1.0; for (int32_t j=0; j<dims; j++) feat(j, i)=CMath::random(0.0, 1.0)-dist; } } lab.display_vector("lab"); feat.display_matrix("feat"); } void test_mkl_cross_validation() { /* generate random data */ index_t num=10; index_t dims=2; float64_t dist=0.5; SGVector<float64_t> lab(num); SGMatrix<float64_t> feat(dims, num); gen_rand_data(lab, feat, dist); /*create train labels */ CLabels* labels=new CBinaryLabels(lab); /* create train features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); features->set_feature_matrix(feat); SG_REF(features); /* create combined features */ CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); SG_REF(comb_features); /* create multiple gaussian kernels */ CCombinedKernel* kernel=new CCombinedKernel(); kernel->append_kernel(new CGaussianKernel(10, 0.1)); kernel->append_kernel(new CGaussianKernel(10, 1)); kernel->append_kernel(new CGaussianKernel(10, 2)); kernel->init(comb_features, comb_features); SG_REF(kernel); /* create mkl using libsvm, due to a mem-bug, interleaved is not possible */ CMKLClassification* svm=new CMKLClassification(new CLibSVM()); svm->set_interleaved_optimization_enabled(false); svm->set_kernel(kernel); SG_REF(svm); /* create cross-validation instance */ index_t num_folds=3; CSplittingStrategy* split=new CStratifiedCrossValidationSplitting(labels, num_folds); CEvaluation* eval=new CContingencyTableEvaluation(ACCURACY); CCrossValidation* cross=new CCrossValidation(svm, comb_features, labels, split, eval, false); /* add print output listener and mkl storage listener */ cross->add_cross_validation_output(new CCrossValidationPrintOutput()); CCrossValidationMKLStorage* mkl_storage=new CCrossValidationMKLStorage(); cross->add_cross_validation_output(mkl_storage); /* perform cross-validation, this will print loads of information * (caused by the CCrossValidationPrintOutput instance attached to it) */ CEvaluationResult* result=cross->evaluate(); /* print mkl weights */ SGMatrix<float64_t> weights=mkl_storage->get_mkl_weights(); weights.display_matrix("mkl weights"); /* print mean and variance of each kernel weight. These could for example * been used to compute confidence intervals */ CStatistics::matrix_mean(weights, false).display_vector("mean per kernel"); CStatistics::matrix_variance(weights, false).display_vector("variance per kernel"); CStatistics::matrix_std_deviation(weights, false).display_vector("std-dev per kernel"); SG_UNREF(result); /* again for two runs */ cross->set_num_runs(2); result=cross->evaluate(); /* print mkl weights */ weights=mkl_storage->get_mkl_weights(); weights.display_matrix("mkl weights"); /* print mean and variance of each kernel weight. These could for example * been used to compute confidence intervals */ CStatistics::matrix_mean(weights, false).display_vector("mean per kernel"); CStatistics::matrix_variance(weights, false).display_vector("variance per kernel"); CStatistics::matrix_std_deviation(weights, false).display_vector("std-dev per kernel"); /* clean up */ SG_UNREF(result); SG_UNREF(cross); SG_UNREF(kernel); SG_UNREF(features); SG_UNREF(comb_features); SG_UNREF(svm); } int main() { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_mkl_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/multiclass/MulticlassLibLinear.h> #include <shogun/io/SGIO.h> #include <shogun/io/CSVFile.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/MulticlassAccuracy.h> using namespace shogun; // Prepare to read a file for the training data const char fname_feats[] = "../data/fm_train_real.dat"; const char fname_labels[] = "../data/label_train_multiclass.dat"; void test_cross_validation() { /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); /* create svm via libsvm */ float64_t svm_C=10; float64_t svm_eps=0.0001; CMulticlassLibLinear* svm=new CMulticlassLibLinear(svm_C, features, labels); svm->set_epsilon(svm_eps); /* train and output */ svm->train(features); CMulticlassLabels* output=CLabelsFactory::to_multiclass(svm->apply(features)); for (index_t i=0; i<features->get_num_vectors(); ++i) SG_SPRINT("i=%d, class=%f,\n", i, output->get_label(i)); /* evaluation criterion */ CMulticlassAccuracy* eval_crit = new CMulticlassAccuracy (); /* evaluate training error */ float64_t eval_result=eval_crit->evaluate(output, labels); SG_SPRINT("training accuracy: %f\n", eval_result); SG_UNREF(output); /* assert that regression "works". this is not guaranteed to always work * but should be a really coarse check to see if everything is going * approx. right */ ASSERT(eval_result<2); /* splitting strategy */ index_t n_folds=5; CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(svm, features, labels, splitting, eval_crit); cross->set_num_runs(1); // cross->set_conf_int_alpha(0.05); /* actual evaluation */ CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); result->print_result(); /* clean up */ SG_UNREF(result); SG_UNREF(cross); SG_UNREF(features); SG_UNREF(labels); } int main(int argc, char **argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 yoo, thereisnoknife@gmail.com * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/io/CSVFile.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/kernel/PolyKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/classifier/mkl/MKLMulticlass.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/MulticlassAccuracy.h> using namespace shogun; /* cross-validation instances */ const index_t n_folds=2; const index_t n_runs=2; /* file data */ const char fname_feats[]="../data/fm_train_real.dat"; const char fname_labels[]="../data/label_train_multiclass.dat"; void test_multiclass_mkl_cv() { /* init random number generator for reproducible results of cross-validation in the light of ASSERT(result->mean>0.81); some lines down below */ sg_rand->set_seed(12); /* dense features from matrix */ CCSVFile* feature_file = new CCSVFile(fname_feats); SGMatrix<float64_t> mat=SGMatrix<float64_t>(); mat.load(feature_file); SG_UNREF(feature_file); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); SG_REF(features); /* labels from vector */ CCSVFile* label_file = new CCSVFile(fname_labels); SGVector<float64_t> label_vec; label_vec.load(label_file); SG_UNREF(label_file); CMulticlassLabels* labels=new CMulticlassLabels(label_vec); SG_REF(labels); /* combined features and kernel */ CCombinedFeatures *cfeats=new CCombinedFeatures(); CCombinedKernel *cker=new CCombinedKernel(); SG_REF(cfeats); SG_REF(cker); /** 1st kernel: gaussian */ cfeats->append_feature_obj(features); cker->append_kernel(new CGaussianKernel(features, features, 1.2, 10)); /** 2nd kernel: linear */ cfeats->append_feature_obj(features); cker->append_kernel(new CLinearKernel(features, features)); /** 3rd kernel: poly */ cfeats->append_feature_obj(features); cker->append_kernel(new CPolyKernel(features, features, 2, true, 10)); cker->init(cfeats, cfeats); /* create mkl instance */ CMKLMulticlass* mkl=new CMKLMulticlass(1.2, cker, labels); SG_REF(mkl); mkl->set_epsilon(0.00001); mkl->parallel->set_num_threads(1); mkl->set_mkl_epsilon(0.001); mkl->set_mkl_norm(1.5); /* train to see weights */ mkl->train(); cker->get_subkernel_weights().display_vector("weights"); CMulticlassAccuracy* eval_crit=new CMulticlassAccuracy(); CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, n_folds); CCrossValidation *cross=new CCrossValidation(mkl, cfeats, labels, splitting, eval_crit); cross->set_autolock(false); cross->set_num_runs(n_runs); // cross->set_conf_int_alpha(0.05); /* perform x-val and print result */ CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); SG_SPRINT("mean of %d %d-fold x-val runs: %f\n", n_runs, n_folds, result->mean); /* assert high accuracy */ ASSERT(result->mean>0.81); /* clean up */ SG_UNREF(features); SG_UNREF(labels); SG_UNREF(cfeats); SG_UNREF(cker); SG_UNREF(mkl); SG_UNREF(cross); SG_UNREF(result); } int main(int argc, char** argv){ shogun::init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); /* performs cross-validation on a multi-class mkl machine */ test_multiclass_mkl_cv(); exit_shogun(); }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/regression/KernelRidgeRegression.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/CrossValidationSplitting.h> #include <shogun/evaluation/MeanSquaredError.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_cross_validation() { #ifdef HAVE_LAPACK /* data matrix dimensions */ index_t num_vectors=100; index_t num_features=1; /* training label data */ SGVector<float64_t> lab(num_vectors); /* fill data matrix and labels */ SGMatrix<float64_t> train_dat(num_features, num_vectors); SGVector<float64_t>::range_fill_vector(train_dat.matrix, num_vectors); for (index_t i=0; i<num_vectors; ++i) { /* labels are linear plus noise */ lab.vector[i]=i+CMath::normal_random(0, 1.0); } /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels */ CRegressionLabels* labels=new CRegressionLabels(lab); /* kernel */ CLinearKernel* kernel=new CLinearKernel(); kernel->init(features, features); /* kernel ridge regression*/ float64_t tau=0.0001; CKernelRidgeRegression* krr=new CKernelRidgeRegression(tau, kernel, labels); /* evaluation criterion */ CMeanSquaredError* eval_crit= new CMeanSquaredError(); /* train and output */ krr->train(features); CRegressionLabels* output= CLabelsFactory::to_regression(krr->apply()); for (index_t i=0; i<num_vectors; ++i) { SG_SPRINT("x=%f, train=%f, predict=%f\n", train_dat.matrix[i], labels->get_label(i), output->get_label(i)); } /* evaluate training error */ float64_t eval_result=eval_crit->evaluate(output, labels); SG_SPRINT("training error: %f\n", eval_result); SG_UNREF(output); /* assert that regression "works". this is not guaranteed to always work * but should be a really coarse check to see if everything is going * approx. right */ ASSERT(eval_result<2); /* splitting strategy */ index_t n_folds=5; CCrossValidationSplitting* splitting= new CCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(krr, features, labels, splitting, eval_crit); cross->set_num_runs(100); // cross->set_conf_int_alpha(0.05); /* actual evaluation */ CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("cross_validation estimate:\n"); result->print_result(); /* same crude assertion as for above evaluation */ ASSERT(result->mean<2); /* clean up */ SG_UNREF(result); SG_UNREF(cross); SG_UNREF(features); #endif /* HAVE_LAPACK */ } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void test() { SGMatrix<float64_t> data(3, 10); CDenseFeatures<float64_t>* f=new CDenseFeatures<float64_t>(data); SGVector<float64_t>::range_fill_vector(data.matrix, data.num_cols*data.num_rows, 1.0); SGMatrix<float64_t>::display_matrix(data.matrix, data.num_rows, data.num_cols, "original feature data"); index_t offset_subset=1; SGVector<index_t> feature_subset(8); SGVector<index_t>::range_fill_vector(feature_subset.vector, feature_subset.vlen, offset_subset); SGVector<index_t>::display_vector(feature_subset.vector, feature_subset.vlen, "feature subset"); f->add_subset(feature_subset); SG_SPRINT("feature vectors after setting subset on original data:\n"); for (index_t i=0; i<f->get_num_vectors(); ++i) { SGVector<float64_t> vec=f->get_feature_vector(i); SG_SPRINT("%i: ", i); SGVector<float64_t>::display_vector(vec.vector, vec.vlen); f->free_feature_vector(vec, i); } index_t offset_copy=2; SGVector<index_t> feature_copy_subset(4); SGVector<index_t>::range_fill_vector(feature_copy_subset.vector, feature_copy_subset.vlen, offset_copy); SGVector<index_t>::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen, "indices that are to be copied"); CDenseFeatures<float64_t>* subset_copy= (CDenseFeatures<float64_t>*)f->copy_subset(feature_copy_subset); SGMatrix<float64_t> subset_copy_matrix=subset_copy->get_feature_matrix(); SGMatrix<float64_t>::display_matrix(subset_copy_matrix.matrix, subset_copy_matrix.num_rows, subset_copy_matrix.num_cols, "copy matrix"); index_t num_its=subset_copy_matrix.num_rows*subset_copy_matrix.num_cols; for (index_t i=0; i<num_its; ++i) { index_t idx=i+(offset_copy+offset_subset)*subset_copy_matrix.num_rows; ASSERT(subset_copy_matrix.matrix[i]==data.matrix[idx]); } SG_UNREF(f); SG_UNREF(subset_copy); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/SparseFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void test() { index_t num_vectors=10; index_t num_dimensions=7; index_t num_features=3; /* create some sparse data */ SGSparseMatrix<float64_t> data=SGSparseMatrix<float64_t>(num_dimensions, num_vectors); for (index_t i=0; i<num_vectors; ++i) { /* put elements only at even indices */ data.sparse_matrix[i]=SGSparseVector<float64_t>(num_features); /* fill */ for (index_t j=0; j<num_features; ++j) { data.sparse_matrix[i].features[j].entry=i+j; data.sparse_matrix[i].features[j].feat_index=3*j; } } CSparseFeatures<float64_t>* f=new CSparseFeatures<float64_t>(data); /* display sparse matrix */ SG_SPRINT("original data\n"); for (index_t i=0; i<num_vectors; ++i) { SG_SPRINT("sparse vector at %i: [", i); for (index_t j=0; j<num_features; ++j) SG_SPRINT("%f, ", data.sparse_matrix[i].features[j].entry); SG_SPRINT("]\n"); } /* indices for a subset */ index_t offset_subset=1; SGVector<index_t> feature_subset(8); SGVector<index_t>::range_fill_vector(feature_subset.vector, feature_subset.vlen, offset_subset); SGVector<index_t>::display_vector(feature_subset.vector, feature_subset.vlen, "feature subset"); /* set subset and print data */ f->add_subset(feature_subset); SG_SPRINT("feature vectors after setting subset on original data:\n"); for (index_t i=0; i<f->get_num_vectors(); ++i) { SGSparseVector<float64_t> vec=f->get_sparse_feature_vector(i); SG_SPRINT("sparse vector at %i: ", i); for (index_t j=0; j<num_features; ++j) SG_SPRINT("%f, ", vec.features[j].entry); SG_SPRINT("]\n"); f->free_sparse_feature_vector(i); } /* indices that are to copy */ index_t offset_copy=2; SGVector<index_t> feature_copy_subset(4); SGVector<index_t>::range_fill_vector(feature_copy_subset.vector, feature_copy_subset.vlen, offset_copy); SGVector<index_t>::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen, "indices that are to be copied"); /* copy a subset of features */ CSparseFeatures<float64_t>* subset_copy= (CSparseFeatures<float64_t>*)f->copy_subset(feature_copy_subset); /* print copied subset */ SG_SPRINT("copied features:\n"); for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGSparseVector<float64_t> vec=subset_copy->get_sparse_feature_vector(i); SG_SPRINT("sparse vector at %i: ", i); for (index_t j=0; j<num_features; ++j) SG_SPRINT("%f, ", vec.features[j].entry); SG_SPRINT("]\n"); subset_copy->free_sparse_feature_vector(i); } /* test if all elements are copied correctly */ for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGSparseVector<float64_t> vec=subset_copy->get_sparse_feature_vector(i); index_t ind=i+offset_copy+offset_subset+1; for (index_t j=0; j<vec.num_feat_entries; ++j) { float64_t a_entry=vec.features[j].entry; float64_t b_entry=data.sparse_matrix[ind].features[j].entry; index_t a_idx=vec.features[j].feat_index; index_t b_idx=data.sparse_matrix[ind].features[j].feat_index; ASSERT(a_entry==b_entry); ASSERT(a_idx==b_idx); } subset_copy->free_sparse_feature_vector(i); } SG_UNREF(f); SG_UNREF(subset_copy); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
#include <shogun/features/DenseFeatures.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); //sg_io->set_loglevel(MSG_DEBUG); //sg_io->enable_file_and_line(); // create three 2-dimensional vectors SGMatrix<float64_t> matrix(2,3); for (int32_t i=0; i<6; i++) matrix.matrix[i]=i; // shogun will now own the matrix created CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); ASSERT(features->parameter_hash_changed()); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/mathematics/Math.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } const int32_t num_labels=10; const int32_t num_classes=3; void test() { const int32_t num_subset_idx=CMath::random(1, num_labels); /* create labels */ CMulticlassLabels* labels=new CMulticlassLabels(num_labels); for (index_t i=0; i<num_labels; ++i) labels->set_label(i, i%num_classes); SG_REF(labels); /* print labels */ SGVector<float64_t> labels_data=labels->get_labels(); SGVector<float64_t>::display_vector(labels_data.vector, labels_data.vlen, "labels"); /* create subset indices */ SGVector<index_t> subset_idx(num_subset_idx); subset_idx.range_fill(); CMath::permute(subset_idx); /* print subset indices */ SGVector<index_t>::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices"); /* apply subset to features */ SG_SPRINT("\n\n-------------------\n" "applying subset to features\n" "-------------------\n"); labels->add_subset(subset_idx); /* do some stuff do check and output */ ASSERT(labels->get_num_labels()==num_subset_idx); SG_SPRINT("labels->get_num_labels(): %d\n", labels->get_num_labels()); for (index_t i=0; i<labels->get_num_labels(); ++i) { float64_t label=labels->get_label(i); SG_SPRINT("label %f:\n", label); ASSERT(label==labels_data.vector[subset_idx.vector[i]]); } /* remove features subset */SG_SPRINT("\n\n-------------------\n" "removing subset from features\n" "-------------------\n"); labels->remove_all_subsets(); ASSERT(labels->get_num_labels()==num_labels); SG_SPRINT("labels->get_num_labels(): %d\n", labels->get_num_labels()); for (index_t i=0; i<labels->get_num_labels(); ++i) { float64_t label=labels->get_label(i); SG_SPRINT("label %f:\n", label); ASSERT(label==labels_data.vector[i]); } SG_UNREF(labels); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/Subset.h> #include <shogun/mathematics/Math.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void check_transposed(CDenseFeatures<int32_t>* features) { CDenseFeatures<int32_t>* transposed=features->get_transposed(); CDenseFeatures<int32_t>* double_transposed=transposed->get_transposed(); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGVector<int32_t> orig_vec=features->get_feature_vector(i); SGVector<int32_t> new_vec=double_transposed->get_feature_vector(i); ASSERT(orig_vec.vlen==new_vec.vlen); for (index_t j=0; j<orig_vec.vlen; j++) ASSERT(orig_vec.vector[j]==new_vec.vector[j]); /* not necessary since feature matrix is in memory. for documentation */ features->free_feature_vector(orig_vec,i); double_transposed->free_feature_vector(new_vec, i); } SG_UNREF(transposed); SG_UNREF(double_transposed); } const int32_t num_vectors=6; const int32_t dim_features=6; void test() { const int32_t num_subset_idx=CMath::random(1, num_vectors); /* create feature data matrix */ SGMatrix<int32_t> data(dim_features, num_vectors); /* fill matrix with random data */ for (index_t i=0; i<num_vectors; ++i) { for (index_t j=0; j<dim_features; ++j) data.matrix[i*dim_features+j]=CMath::random(-5, 5); } /* create simple features */ CDenseFeatures<int32_t>* features=new CDenseFeatures<int32_t> (data); SG_REF(features); /* print feature matrix */ SGMatrix<int32_t>::display_matrix(data.matrix, data.num_rows, data.num_cols, "feature matrix"); /* create subset indices */ SGVector<index_t> subset_idx(num_subset_idx); subset_idx.range_fill(); CMath::permute(subset_idx); /* print subset indices */ SGVector<index_t>::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices"); /* apply subset to features */ SG_SPRINT("\n\n-------------------\n" "applying subset to features\n" "-------------------\n"); features->add_subset(subset_idx); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_subset_idx); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGVector<int32_t> vec=features->get_feature_vector(i); SG_SPRINT("vector %d: ", i); SGVector<int32_t>::display_vector(vec.vector, vec.vlen); for (index_t j=0; j<dim_features; ++j) ASSERT(vec.vector[j]==data.matrix[subset_idx.vector[i]*num_vectors+j]); /* not necessary since feature matrix is in memory. for documentation */ features->free_feature_vector(vec, i); } /* remove features subset */ SG_SPRINT("\n\n-------------------\n" "removing subset from features\n" "-------------------\n"); features->remove_all_subsets(); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_vectors); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGVector<int32_t> vec=features->get_feature_vector(i); SG_SPRINT("vector %d: ", i); SGVector<int32_t>::display_vector(vec.vector, vec.vlen); for (index_t j=0; j<dim_features; ++j) ASSERT(vec.vector[j]==data.matrix[i*num_vectors+j]); /* not necessary since feature matrix is in memory. for documentation */ features->free_feature_vector(vec, i); } SG_UNREF(features); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/features/SubsetStack.h> using namespace shogun; void test() { CSubsetStack* stack=new CSubsetStack(); SG_REF(stack); /* subset indices, each set is shifted by one */ SGVector<index_t> subset_a(10); SGVector<index_t> subset_b(4); subset_a.range_fill(1); subset_b.range_fill(1); /* add and remove subsets a couple of times */ stack->add_subset(subset_a); stack->remove_subset(); stack->add_subset(subset_b); stack->remove_subset(); /* add and remove subsets a couple of times, different order */ stack->add_subset(subset_a); stack->add_subset(subset_b); stack->remove_subset(); stack->remove_subset(); /** add two subsets and check if index mapping works */ stack->add_subset(subset_a); stack->add_subset(subset_b); /* remember, offset of one for each index set */ for (index_t i=0; i<subset_b.vlen; ++i) ASSERT(stack->subset_idx_conversion(i)==i+2); stack->remove_subset(); stack->remove_subset(); /* clean up */ SG_UNREF(stack); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/features/hashed/HashedDenseFeatures.h> #include <shogun/features/hashed/HashedSparseFeatures.h> #include <shogun/mathematics/Math.h> #include <shogun/kernel/PolyKernel.h> using namespace shogun; int main() { init_shogun_with_defaults(); int32_t num_vectors = 5; int32_t dim = 20; SGMatrix<int32_t> mat(dim, num_vectors); for (index_t v=0; v<num_vectors; v++) { for (index_t d=0; d<dim; d++) mat(d,v) = CMath::random(-dim, dim); } int32_t hashing_dim = 12; CHashedDenseFeatures<int32_t>* h_dense_feats = new CHashedDenseFeatures<int32_t>(mat, hashing_dim); CSparseFeatures<int32_t>* sparse_feats = new CSparseFeatures<int32_t>(mat); CHashedSparseFeatures<int32_t>* h_sparse_feats = new CHashedSparseFeatures<int32_t>(sparse_feats, hashing_dim); SG_REF(h_dense_feats); CPolyKernel* kernel = new CPolyKernel(h_dense_feats, h_dense_feats, 1, false); SGMatrix<float64_t> dense_mt = kernel->get_kernel_matrix(); SG_UNREF(kernel); SG_REF(h_sparse_feats); kernel = new CPolyKernel(h_sparse_feats, h_sparse_feats, 1, false); SGMatrix<float64_t> sparse_mt = kernel->get_kernel_matrix(); SG_UNREF(kernel); for (index_t i=0; i<dense_mt.num_rows; i++) { for (index_t j=0; j<dense_mt.num_cols; j++) ASSERT(dense_mt(i,j)==sparse_mt(i,j)) } dense_mt.display_matrix("Dense matrix"); sparse_mt.display_matrix("Sparse matrix"); SG_UNREF(h_dense_feats); SG_UNREF(h_sparse_feats); exit_shogun(); }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2014 Jiaolong Xu * Copyright (C) 2014 Jiaolong Xu */ #include <shogun/io/LibSVMFile.h> #include <shogun/lib/SGVector.h> #include <shogun/lib/SGSparseVector.h> #include <shogun/base/DynArray.h> #include <shogun/base/init.h> using namespace shogun; #define SHOW_DATA /* file data */ const char fname_svm_multilabel[] = "../../../../data/multilabel/yeast_test.svm"; void test_libsvmfile_multilabel(const char* fname) { FILE* pfile = fopen(fname, "r"); if (pfile == NULL) { SG_SPRINT("Unable to open file: %s\n", fname); return; } fclose(pfile); /* sparse data from matrix*/ CLibSVMFile* svmfile = new CLibSVMFile(fname); SGSparseVector<float64_t>* feats; SGVector<float64_t>* labels; int32_t dim_feat; int32_t num_samples; int32_t num_classes; svmfile->get_sparse_matrix(feats, dim_feat, num_samples, labels, num_classes); #ifdef SHOW_DATA // Display the labels for (int32_t i = 0; i < num_samples; i++) { labels[i].display_vector(); } #endif SG_SPRINT("Number of the samples: %d\n", num_samples); SG_SPRINT("Dimention of the feature: %d\n", dim_feat); SG_SPRINT("Number of classes: %d\n", num_classes); SG_UNREF(svmfile); SG_FREE(feats); SG_FREE(labels); } int main(int argc, char ** argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); test_libsvmfile_multilabel(fname_svm_multilabel); exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/io/LineReader.h> #include <shogun/lib/DelimiterTokenizer.h> #include <shogun/lib/SGVector.h> #include <shogun/io/SGIO.h> #include <cstdio> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); FILE* fin=fopen("io_linereader.cpp", "r"); CDelimiterTokenizer* tokenizer=new CDelimiterTokenizer(); tokenizer->delimiters['\n']=1; SG_REF(tokenizer); CLineReader* reader=new CLineReader(fin, tokenizer); int lines_count=0; SGVector<char> tmp_string; while (reader->has_next()) { tmp_string=reader->read_line(); SG_SPRINT("%d %d ", lines_count, tmp_string.vlen); for (int i=0; i<tmp_string.vlen; i++) SG_SPRINT("%c", tmp_string.vector[i]); SG_SPRINT("\n"); lines_count++; } SG_SPRINT("total lines: %d\n", lines_count); tmp_string=SGVector<char>(); SG_UNREF(reader); SG_UNREF(tokenizer); fclose(fin); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/DataGenerator.h> #include <shogun/features/IndexFeatures.h> #include <shogun/mathematics/Math.h> using namespace shogun; void test_custom_kernel_subsets() { /* create some data */ index_t m=10; CFeatures* features= new CDenseFeatures<float64_t>(CDataGenerator::generate_mean_data( m, 2, 1)); SG_REF(features); /* create a custom kernel */ CKernel* k=new CGaussianKernel(); k->init(features, features); CCustomKernel* l=new CCustomKernel(k); /* create a random permutation */ SGVector<index_t> subset(m); for (index_t run=0; run<100; ++run) { subset.range_fill(); CMath::permute(subset); // subset.display_vector("permutation"); features->add_subset(subset); k->init(features, features); l->add_row_subset(subset); l->add_col_subset(subset); // k->get_kernel_matrix().display_matrix("K"); // l->get_kernel_matrix().display_matrix("L"); for (index_t i=0; i<m; ++i) { for (index_t j=0; j<m; ++j) { SG_SDEBUG("K(%d,%d)=%f, L(%d,%d)=%f\n", i, j, k->kernel(i, j), i, j, l->kernel(i, j)); ASSERT(CMath::abs(k->kernel(i, j)-l->kernel(i, j))<10E-8); } } features->remove_subset(); l->remove_row_subset(); l->remove_col_subset(); } SG_UNREF(k); SG_UNREF(l); SG_UNREF(features); } int main(int argc, char** argv) { init_shogun_with_defaults(); //sg_io->set_loglevel(MSG_DEBUG); test_custom_kernel_subsets(); exit_shogun(); return 0; }
/* * Copyright (c) The Shogun Machine Learning Toolbox * Written (w) 2014 pl8787 * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation are those * of the authors and should not be interpreted as representing official policies, * either expressed or implied, of the Shogun Development Team. */ #include <shogun/base/init.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/DataGenerator.h> #include <shogun/features/IndexFeatures.h> using namespace shogun; void test_custom_kernel_index_subsets() { /* create some data */ index_t m=10; index_t num_sub_row=3; index_t num_sub_col=2; CFeatures* features= new CDenseFeatures<float64_t>(CDataGenerator::generate_mean_data( m, 2, 1)); SG_REF(features); /* create a custom kernel */ CGaussianKernel* gaussian_kernel=new CGaussianKernel(2,10); gaussian_kernel->init(features, features); CCustomKernel* custom_kernel=new CCustomKernel(gaussian_kernel); /* create random permutations */ SGVector<index_t> row_subset(num_sub_row); SGVector<index_t> col_subset(num_sub_col); row_subset.range_fill(); CMath::permute(row_subset); col_subset.range_fill(); CMath::permute(col_subset); /* create index features */ CIndexFeatures* row_idx_feat=new CIndexFeatures(row_subset); CIndexFeatures* col_idx_feat=new CIndexFeatures(col_subset); SG_REF(row_idx_feat); SG_REF(col_idx_feat); custom_kernel->init(row_idx_feat, col_idx_feat); SGMatrix<float64_t> gaussian_kernel_matrix= gaussian_kernel->get_kernel_matrix(); SGMatrix<float64_t> custom_kernel_matrix= custom_kernel->get_kernel_matrix(); custom_kernel_matrix.display_matrix("subset"); SG_UNREF(gaussian_kernel); SG_UNREF(custom_kernel); SG_UNREF(row_idx_feat); SG_UNREF(col_idx_feat); SG_UNREF(features); } int main(int argc, char** argv) { init_shogun_with_defaults(); test_custom_kernel_index_subsets(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/DataGenerator.h> using namespace shogun; void test_custom_kernel_subsets() { /* create some data */ index_t m=10; CFeatures* features= new CDenseFeatures<float64_t>(CDataGenerator::generate_mean_data( m, 2, 1)); SG_REF(features); /* create a custom kernel */ CKernel* k=new CGaussianKernel(); k->init(features, features); CCustomKernel* l=new CCustomKernel(k); /* create a random permutation */ SGVector<index_t> subset(m); for (index_t run=0; run<100; ++run) { subset.range_fill(); CMath::permute(subset); // subset.display_vector("permutation"); features->add_subset(subset); k->init(features, features); l->add_row_subset(subset); l->add_col_subset(subset); // k->get_kernel_matrix().display_matrix("K"); // l->get_kernel_matrix().display_matrix("L"); for (index_t i=0; i<m; ++i) { for (index_t j=0; j<m; ++j) { SG_SDEBUG("K(%d,%d)=%f, L(%d,%d)=%f\n", i, j, k->kernel(i, j), i, j, l->kernel(i, j)); ASSERT(CMath::abs(k->kernel(i, j)-l->kernel(i, j))<10E-8); } } features->remove_subset(); l->remove_row_subset(); l->remove_col_subset(); } SG_UNREF(k); SG_UNREF(l); SG_UNREF(features); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_custom_kernel_subsets(); exit_shogun(); return 0; }
#include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <stdio.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data SGMatrix<float64_t> matrix(2,3); for (int32_t i=0; i<6; i++) matrix.matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(features, features, 0.5, 10); // print kernel matrix for (int32_t i=0; i<3; i++) { for (int32_t j=0; j<3; j++) { SG_SPRINT("%f ", kernel->kernel(i,j)); } SG_SPRINT("\n"); } // free up memory SG_UNREF(kernel); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test() { /* data matrix dimensions */ index_t num_vectors=6; index_t num_features=2; /* data means -1, 1 in all components, small std deviation */ SGVector<float64_t> mean_1(num_features); SGVector<float64_t> mean_2(num_features); SGVector<float64_t>::fill_vector(mean_1.vector, mean_1.vlen, -10.0); SGVector<float64_t>::fill_vector(mean_2.vector, mean_2.vlen, 10.0); float64_t sigma=0.5; SGVector<float64_t>::display_vector(mean_1.vector, mean_1.vlen, "mean 1"); SGVector<float64_t>::display_vector(mean_2.vector, mean_2.vlen, "mean 2"); /* fill data matrix around mean */ SGMatrix<float64_t> train_dat(num_features, num_vectors); for (index_t i=0; i<num_vectors; ++i) { for (index_t j=0; j<num_features; ++j) { float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0]; train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma); } } SGMatrix<float64_t>::display_matrix(train_dat.matrix, train_dat.num_rows, train_dat.num_cols, "training data"); /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels +/- 1 for each cluster */ SGVector<float64_t> lab(num_vectors); for (index_t i=0; i<num_vectors; ++i) lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0; SGVector<float64_t>::display_vector(lab.vector, lab.vlen, "training labels"); CBinaryLabels* labels=new CBinaryLabels(lab); SG_REF(labels); /* evaluation instance */ CContingencyTableEvaluation* eval=new CContingencyTableEvaluation(ACCURACY); /* kernel */ CKernel* kernel=new CLinearKernel(); kernel->init(features, features); /* create svm via libsvm */ float64_t svm_C=10; float64_t svm_eps=0.0001; CLibSVM* svm=new CLibSVM(svm_C, kernel, labels); svm->set_epsilon(svm_eps); /* now train a few times on different subsets on data and assert that * results are correct (data linear separable) */ svm->data_lock(labels, features); SGVector<index_t> indices(5); indices.vector[0]=1; indices.vector[1]=2; indices.vector[2]=3; indices.vector[3]=4; indices.vector[4]=5; SGVector<index_t>::display_vector(indices.vector, indices.vlen, "training indices"); svm->train_locked(indices); CBinaryLabels* output=CLabelsFactory::to_binary(svm->apply()); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "apply() output"); SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels"); SG_SPRINT("accuracy: %f\n", eval->evaluate(output, labels)); ASSERT(eval->evaluate(output, labels)==1); SG_UNREF(output); SG_SPRINT("\n\n"); indices=SGVector<index_t>(3); indices.vector[0]=1; indices.vector[1]=2; indices.vector[2]=3; SGVector<index_t>::display_vector(indices.vector, indices.vlen, "training indices"); output=CLabelsFactory::to_binary(svm->apply()); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "apply() output"); SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels"); SG_SPRINT("accuracy: %f\n", eval->evaluate(output, labels)); ASSERT(eval->evaluate(output, labels)==1); SG_UNREF(output); SG_SPRINT("\n\n"); indices=SGVector<index_t>(4); indices.range_fill(); SGVector<index_t>::display_vector(indices.vector, indices.vlen, "training indices"); svm->train_locked(indices); output=CLabelsFactory::to_binary(svm->apply()); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "apply() output"); SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels"); SG_SPRINT("accuracy: %f\n", eval->evaluate(output, labels)); ASSERT(eval->evaluate(output, labels)==1); SG_UNREF(output); SG_SPRINT("normal train\n"); svm->data_unlock(); svm->train(); output=CLabelsFactory::to_binary(svm->apply()); ASSERT(eval->evaluate(output, labels)==1); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "output"); SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels"); SG_UNREF(output); /* clean up */ SG_UNREF(svm); SG_UNREF(features); SG_UNREF(eval); SG_UNREF(labels); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test(); exit_shogun(); return 0; }
#include <shogun/features/DenseFeatures.h> #include <shogun/kernel/DotKernel.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <stdio.h> using namespace shogun; class CReverseLinearKernel : public CDotKernel { public: /** default constructor */ CReverseLinearKernel() : CDotKernel(0) { } /** destructor */ virtual ~CReverseLinearKernel() { } /** initialize kernel * * @param l features of left-hand side * @param r features of right-hand side * @return if initializing was successful */ virtual bool init(CFeatures* l, CFeatures* r) { CDotKernel::init(l, r); return init_normalizer(); } /** load kernel init_data * * @param src file to load from * @return if loading was successful */ virtual bool load_init(FILE* src) { return false; } /** save kernel init_data * * @param dest file to save to * @return if saving was successful */ virtual bool save_init(FILE* dest) { return false; } /** return what type of kernel we are * * @return kernel type UNKNOWN (as it is not part * officially part of shogun) */ virtual EKernelType get_kernel_type() { return K_UNKNOWN; } /** return the kernel's name * * @return name "Reverse Linear" */ inline virtual const char* get_name() const { return "ReverseLinear"; } protected: /** compute kernel function for features a and b * idx_{a,b} denote the index of the feature vectors * in the corresponding feature object * * @param idx_a index a * @param idx_b index b * @return computed kernel function at indices a,b */ virtual float64_t compute(int32_t idx_a, int32_t idx_b) { int32_t alen, blen; bool afree, bfree; float64_t* avec= ((CDenseFeatures<float64_t>*) lhs)->get_feature_vector(idx_a, alen, afree); float64_t* bvec= ((CDenseFeatures<float64_t>*) rhs)->get_feature_vector(idx_b, blen, bfree); ASSERT(alen==blen); float64_t result=0; for (int32_t i=0; i<alen; i++) result+=avec[i]*bvec[alen-i-1]; ((CDenseFeatures<float64_t>*) lhs)->free_feature_vector(avec, idx_a, afree); ((CDenseFeatures<float64_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree); return result; } }; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data SGMatrix<float64_t> matrix(2,3); for (int32_t i=0; i<6; i++) matrix.matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(); features->set_feature_matrix(matrix); // create reverse linear kernel CReverseLinearKernel* kernel = new CReverseLinearKernel(); kernel->init(features,features); // print kernel matrix for (int32_t i=0; i<3; i++) { for (int32_t j=0; j<3; j++) SG_SPRINT("%f ", kernel->kernel(i,j)); SG_SPRINT("\n"); } // free up memory SG_UNREF(kernel); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/labels/BinaryLabels.h> using namespace shogun; void test_sigmoid_fitting() { CBinaryLabels* labels=new CBinaryLabels(10); labels->set_values(SGVector<float64_t>(labels->get_num_labels())); for (index_t i=0; i<labels->get_num_labels(); ++i) labels->set_value(i%2==0 ? 1 : -1, i); labels->get_values().display_vector("scores"); labels->scores_to_probabilities(); labels->get_values().display_vector("probabilities"); SG_UNREF(labels); } int main() { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_sigmoid_fitting(); exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/lib/CircularBuffer.h> #include <shogun/lib/DelimiterTokenizer.h> #include <shogun/lib/SGVector.h> #include <shogun/io/SGIO.h> #include <cstdio> #include <cstring> using namespace shogun; const int max_line_length = 256; int main(int argc, char** argv) { init_shogun_with_defaults(); SGVector<char> test_string(const_cast<char* >("all your bayes are belong to us! "), 33, false); CCircularBuffer* buffer=new CCircularBuffer(max_line_length); CDelimiterTokenizer* tokenizer=new CDelimiterTokenizer(); tokenizer->delimiters[' ']=1; SG_REF(tokenizer); buffer->set_tokenizer(tokenizer); SGVector<char> tmp_string; buffer->push(test_string); int num_read; index_t start; while ((num_read=buffer->next_token_idx(start))>0) { buffer->skip_characters(start); tmp_string=buffer->pop(num_read); buffer->skip_characters(1); for (int i=0; i<tmp_string.vlen; i++) SG_SPRINT("%c", tmp_string.vector[i]); SG_SPRINT("\n"); } SG_UNREF(buffer); SG_UNREF(tokenizer); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2009 Soeren Sonnenburg * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/io/SGIO.h> #include <shogun/lib/Time.h> #include <shogun/lib/ShogunException.h> #include <shogun/mathematics/Math.h> #include <shogun/lib/DynInt.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_warning(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_error(FILE* target, const char* str) { fprintf(target, "%s", str); } void gen_ints(uint256_t* &a, uint32_t* &b, uint32_t len) { a=SG_MALLOC(uint256_t, len); b=SG_MALLOC(uint32_t, len); CMath::init_random(17); for (uint32_t i=0; i<len; i++) { uint64_t r[4]={(uint64_t) CMath::random() << 32 | CMath::random(), (uint64_t) CMath::random() << 32 | CMath::random(), (uint64_t) CMath::random() << 32 | CMath::random(), (uint64_t) CMath::random() << 32 | CMath::random()}; a[len-i-1]=r; b[len-i-1]=i; } } const int LEN = 5*1024; int main() { init_shogun(&print_message, &print_warning, &print_error); try { uint256_t* a; uint32_t* b; CTime t; t.io->set_loglevel(MSG_DEBUG); SG_SPRINT("gen data.."); t.start(); gen_ints(a,b, LEN); t.cur_time_diff(true); SG_SPRINT("qsort.."); t.start(); CMath::qsort_index(a, b, LEN); t.cur_time_diff(true); SG_SPRINT("\n\n"); for (uint32_t i=0; i<10; i++) { SG_SPRINT("a[%d]=", i); a[i].print_hex(); SG_SPRINT("\n"); } SG_SPRINT("\n\n"); uint64_t val1[4]={1,2,3,4}; uint64_t val2[4]={5,6,7,8}; a[0]=val1; a[1]=val2; a[2]=a[0]; CMath::swap(a[0],a[1]); printf("a[0]==a[1] %d\n", (int) (a[0] == a[1])); printf("a[0]<a[1] %d\n", (int) (a[0] < a[1])); printf("a[0]<=a[1] %d\n", (int) (a[0] <= a[1])); printf("a[0]>a[1] %d\n", (int) (a[0] > a[1])); printf("a[0]>=a[1] %d\n", (int) (a[0] >= a[1])); printf("a[0]==a[0] %d\n", (int) (a[0] == a[0])); printf("a[0]<a[0] %d\n", (int) (a[0] < a[0])); printf("a[0]<=a[0] %d\n", (int) (a[0] <= a[0])); printf("a[0]>a[0] %d\n", (int) (a[0] > a[0])); printf("a[0]>=a[0] %d\n", (int) (a[0] >= a[0])); SG_SPRINT("\n\n"); for (uint32_t i=0; i<10 ; i++) { SG_SPRINT("a[%d]=", i); a[i].print_hex(); printf("\n"); } SG_FREE(a); SG_FREE(b); } catch(ShogunException & sh) { SG_SPRINT("%s",sh.get_exception_string()); } exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2009 Soeren Sonnenburg * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/io/SGIO.h> #include <shogun/lib/common.h> #include <shogun/lib/SGVector.h> #include <shogun/base/DynArray.h> using namespace shogun; int main() { init_shogun_with_defaults(); DynArray<int32_t> values; for (int32_t i=0; i<1000; i++) { values.set_element(i,i); } for (int32_t i=0; i<1000; i++) { SG_SPRINT("values[%i]=%i\n", i, values[i]); } DynArray<SGVector<float64_t> > vectors(5); for (int32_t i=0; i<20; i++) { SG_SPRINT("%i\n", i); SGVector<float64_t> vec(i); for (int32_t j=0; j<i; j++) vec.vector[j]=j; vectors.set_element(vec,i); } for (int32_t i=0; i<20; i++) { SG_SPRINT("%i\n", i); vectors[i].display_vector(); } exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/GCArray.h> #include <shogun/kernel/Kernel.h> #include <shogun/kernel/GaussianKernel.h> #include <stdio.h> using namespace shogun; const int l=10; int main(int argc, char** argv) { init_shogun(); // we need this scope, because exit_shogun() must not be called // before the destructor of CGCArray<CKernel*> kernels! { // create array of kernels CGCArray<CKernel*> kernels(l); // fill array with kernels for (int i=0; i<l; i++) kernels.set(new CGaussianKernel(10, 1.0), i); // print kernels for (int i=0; i<l; i++) { CKernel* kernel = kernels.get(i); printf("kernels[%d]=%p\n", i, kernel); SG_UNREF(kernel); } } exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/lib/Hash.h> #include <stdio.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); uint8_t array[4]={0,1,2,3}; printf("hash(0)=%0x\n", CHash::MurmurHash3(&array[0], 1, 0xDEADBEAF)); printf("hash(1)=%0x\n", CHash::MurmurHash3(&array[1], 1, 0xDEADBEAF)); printf("hash(2)=%0x\n", CHash::MurmurHash3(&array[0], 2, 0xDEADBEAF)); printf("hash(3)=%0x\n", CHash::MurmurHash3(&array[0], 4, 0xDEADBEAF)); uint32_t h = 0xDEADBEAF; uint32_t carry = 0; CHash::IncrementalMurmurHash3(&h, &carry, &array[0], 1); printf("inc_hash(0)=%0x\n", h); CHash::IncrementalMurmurHash3(&h, &carry, &array[1], 1); printf("inc_hash(1)=%0x\n", h); CHash::IncrementalMurmurHash3(&h, &carry, &array[2], 1); printf("inc_hash(2)=%0x\n", h); CHash::IncrementalMurmurHash3(&h, &carry, &array[3], 1); printf("inc_hash(3)=%0x\n", h); h = CHash::FinalizeIncrementalMurmurHash3(h, carry, 4); printf("Final inc_hash(3)=%0x\n", h); exit_shogun(); return 0; }
#include <shogun/lib/config.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/SGMatrix.h> #include <shogun/io/HDF5File.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); #ifdef HAVE_HDF5 CHDF5File* hdf = new CHDF5File((char*) "../data/australian.libsvm.h5",'r', "/data/data"); float64_t* mat; int32_t num_feat; int32_t num_vec; hdf->get_matrix(mat, num_feat, num_vec); SGMatrix<float64_t>::display_matrix(mat, num_feat, num_vec); SG_FREE(mat); SG_UNREF(hdf); #endif exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/memory.h> #include <shogun/lib/IndirectObject.h> #include <shogun/mathematics/Math.h> #include <shogun/base/SGObject.h> #include <stdio.h> using namespace shogun; const int l=10; int main(int argc, char** argv) { init_shogun(); // create array a int32_t* a=SG_MALLOC(int32_t, l); for (int i=0; i<l; i++) a[i]=l-i; typedef CIndirectObject<int32_t, int32_t**> INDIRECT; // create array of indirect objects pointing to array a INDIRECT::set_array(&a); INDIRECT* x = SG_MALLOC(INDIRECT, l); INDIRECT::init_slice(x, l); printf("created array a and indirect object array x pointing to a.\n\n"); for (int i=0; i<l; i++) printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i])); //sort the array CMath::qsort(x, l); printf("\n\nvoila! sorted indirect object array x, keeping a const.\n\n"); for (int i=0; i<l; i++) printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i])); SG_FREE(x); SG_FREE(a); exit_shogun(); return 0; }
#include <shogun/lib/Map.h> #include <shogun/io/SGIO.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> using namespace shogun; #define SIZE 6 void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message, &print_message, &print_message); const char* v[SIZE] = {"Russia", "England", "Germany", "USA", "France", "Spain"}; CMap<int32_t, const char*>* map = new CMap<int32_t, const char*>(SIZE/2, SIZE/2); for (int i=0; i<SIZE; i++) map->add(i, v[i]); map->remove(0); //SG_SPRINT("Num of elements: %d\n", map->get_num_elements()); for (int i=0; i<SIZE; i++) { if (map->contains(i)) ; //SG_SPRINT("key %d contains in map with index %d and data=%s\n", // i, map->index_of(i), map->get_element(i)); } SG_UNREF(map); exit_shogun(); return 0; }
#include <shogun/lib/config.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/SGMatrix.h> #include <shogun/io/MLDataHDF5File.h> #include <shogun/io/SGIO.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); #if defined(HAVE_HDF5) && defined( HAVE_CURL) CMLDataHDF5File* hdf = NULL; try { hdf = new CMLDataHDF5File((char *)"australian", "/data/data"); } catch (ShogunException& e) { SG_UNREF(hdf); exit_shogun(); return 0; } float64_t* mat=NULL; int32_t num_feat; int32_t num_vec; try { hdf->get_matrix(mat, num_feat, num_vec); SGMatrix<float64_t>::display_matrix(mat, num_feat, num_vec); } catch (ShogunException& e) { SG_SWARNING("%s", e.get_exception_string()); } SG_FREE(mat); SG_UNREF(hdf); #endif // HAVE_CURL && HAVE_HDF5 exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/io/SGIO.h> #include <shogun/lib/SGMatrix.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/features/SparseFeatures.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); /* create feature data matrix */ SGMatrix<int32_t> data(3, 20); /* fill matrix with random data */ for (index_t i=0; i<20*3; ++i) { if (i%2==0) data.matrix[i]=0; else data.matrix[i]=CMath::random(1, 9); } /* create sparse features */ CSparseFeatures<int32_t>* sparse_features=new CSparseFeatures<int32_t>(data); CSerializableAsciiFile* file; file=new CSerializableAsciiFile("sparseFeatures.txt", 'w'); sparse_features->save_serializable(file); file->close(); SG_UNREF(file); /* this will fail with a warning, same with CSerializableHdf5File and xml serialization*/ CSparseFeatures<int32_t>* sparse_features_loaded = new CSparseFeatures<int32_t>(); file = new CSerializableAsciiFile("sparseFeatures.txt", 'r'); sparse_features_loaded->load_serializable(file); SG_UNREF(file); SG_UNREF(sparse_features_loaded); SG_UNREF(sparse_features); exit_shogun(); }
#include <shogun/lib/Set.h> #include <shogun/io/SGIO.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> using namespace shogun; #define SIZE 8 void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message, &print_message, &print_message); double v[SIZE] = {0.0,0.1,0.2,0.2,0.3,0.4,0.5,0.5}; CSet<double>* set = new CSet<double>(SIZE/2, SIZE/2); for (int i=0; i<SIZE; i++) set->add(v[i]); set->remove(0.2); //SG_SPRINT("Num of elements: %d\n", set->get_num_elements()); for (int i=0; i<SIZE; i++) { if (set->contains(v[i])) ; //SG_SPRINT("%lg contains in set with index %d\n", v[i], set->index_of(v[i])); } SG_UNREF(set); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/mathematics/Statistics.h> #include <shogun/mathematics/Math.h> #include <shogun/lib/SGVector.h> using namespace shogun; void test() { /* SGVector<float64_t> data(10); SGVector<float64_t>::range_fill_vector(data.vector, data.vlen, 1.0); float64_t low, up, mean; float64_t error_prob=0.05; mean=CStatistics::confidence_intervals_mean(data, error_prob, low, up); SG_SPRINT("sample mean: %f. True mean lies in [%f,%f] with %f%%\n", mean, low, up, 100*(1-error_prob)); */ } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Written (W) 2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/lib/SGVector.h> #include <shogun/lib/SGMatrix.h> #include <shogun/mathematics/Math.h> #include <shogun/mathematics/lapack.h> using namespace shogun; #ifdef HAVE_LAPACK bool is_equal(float64_t a, float64_t b, float64_t eps) { return CMath::abs(a-b)<=eps; } void test_ev() { SGMatrix<float64_t> A(3,3); A(0,0)=0; A(0,1)=1; A(0,2)=0; A(1,0)=1; A(1,1)=0; A(1,2)=1; A(1,0)=0; A(2,1)=1; A(2,2)=0; SGVector<float64_t> ev=SGMatrix<float64_t>::compute_eigenvectors(A); SGMatrix<float64_t>::display_matrix(A.matrix, A.num_rows, A.num_cols, "A"); SGVector<float64_t>::display_vector(ev.vector, ev.vlen, "eigenvalues"); float64_t sqrt22=CMath::sqrt(2.0)/2.0; float64_t eps=10E-16; /* check for correct eigenvectors */ ASSERT(is_equal(A(0,0), 0.5, eps)); ASSERT(is_equal(A(0,1), -sqrt22, eps)); ASSERT(is_equal(A(0,2), 0.5, eps)); ASSERT(is_equal(A(1,0), -sqrt22, eps)); ASSERT(is_equal(A(1,1), 0, eps)); ASSERT(is_equal(A(1,2), sqrt22, eps)); ASSERT(is_equal(A(2,0), 0.5, eps)); ASSERT(is_equal(A(2,1), sqrt22, eps)); ASSERT(is_equal(A(2,2), 0.5, eps)); /* check for correct eigenvalues */ ASSERT(is_equal(ev[0], -sqrt22*2, eps)); ASSERT(is_equal(ev[1], 0, eps)); ASSERT(is_equal(ev[2], sqrt22*2, eps)); } void test_matrix_multiply() { index_t n=10; SGMatrix<float64_t> I=SGMatrix<float64_t>::create_identity_matrix(n,1.0); index_t m=4; SGMatrix<float64_t> A(n, m); SGVector<float64_t>::range_fill_vector(A.matrix, m*n); SGMatrix<float64_t>::display_matrix(I, "I"); SGMatrix<float64_t>::transpose_matrix(A.matrix, A.num_rows, A.num_cols); SGMatrix<float64_t>::display_matrix(A, "A transposed"); SGMatrix<float64_t>::transpose_matrix(A.matrix, A.num_rows, A.num_cols); SGMatrix<float64_t>::display_matrix(A, "A"); SG_SPRINT("multiply A by I and check result\n"); SGMatrix<float64_t> A2=SGMatrix<float64_t>::matrix_multiply(I, A); ASSERT(A2.num_rows==A.num_rows); ASSERT(A2.num_cols==A.num_cols); SGMatrix<float64_t>::display_matrix(A2); for (index_t i=0; i<A2.num_rows; ++i) { for (index_t j=0; j<A2.num_cols; ++j) ASSERT(A(i,j)==A2(i,j)); } SG_SPRINT("multiply A by transposed I and check result\n"); SGMatrix<float64_t> A3=SGMatrix<float64_t>::matrix_multiply(I, A, true); ASSERT(A3.num_rows==I.num_rows); ASSERT(A3.num_cols==A.num_cols); SGMatrix<float64_t>::display_matrix(A3); for (index_t i=0; i<A2.num_rows; ++i) { for (index_t j=0; j<A2.num_cols; ++j) ASSERT(A(i,j)==A3(i,j)); } SG_SPRINT("multiply transposed A by I and check result\n"); SGMatrix<float64_t> A4=SGMatrix<float64_t>::matrix_multiply(A, I, true, false); ASSERT(A4.num_rows==A.num_cols); ASSERT(A4.num_cols==I.num_cols); SGMatrix<float64_t>::display_matrix(A4); for (index_t i=0; i<A.num_rows; ++i) { for (index_t j=0; j<A.num_cols; ++j) ASSERT(A(i,j)==A4(j,i)); } SG_SPRINT("multiply A by scaled I and check result\n"); SGMatrix<float64_t> A5=SGMatrix<float64_t>::matrix_multiply(I, A, false, false, n); ASSERT(A5.num_rows==I.num_rows); ASSERT(A5.num_cols==A.num_cols); SGMatrix<float64_t>::display_matrix(A5); for (index_t i=0; i<A2.num_rows; ++i) { for (index_t j=0; j<A2.num_cols; ++j) ASSERT(n*A(i,j)==A5(i,j)); } } void test_lapack() { // size of square matrix int N = 100; // square matrix double* double_matrix = new double[N*N]; // for storing eigenpairs double* double_eigenvalues = new double[N]; double* double_eigenvectors = new double[N*N]; // for SVD double* double_U = new double[N*N]; double* double_s = new double[N]; double* double_Vt = new double[N*N]; // status (should be zero) int status; // DSYGVX for (int i=0; i<N; i++) { for (int j=0; j<N; j++) double_matrix[i*N+j] = ((double)(i-j))/(i+j+1); double_matrix[i*N+i] += 100; } status = 0; wrap_dsygvx(1,'V','U',N,double_matrix,N,double_matrix,N,1,3,double_eigenvalues,double_eigenvectors,&status); if (status!=0) SG_SERROR("DSYGVX/SSYGVX failed with code %d\n",status); delete[] double_eigenvectors; // DGEQRF+DORGQR status = 0; double* double_tau = new double[N]; wrap_dgeqrf(N,N,double_matrix,N,double_tau,&status); wrap_dorgqr(N,N,N,double_matrix,N,double_tau,&status); if (status!=0) SG_SERROR("DGEQRF/DORGQR failed with code %d\n",status); delete[] double_tau; // DGESVD for (int i=0; i<N; i++) { for (int j=0; j<N; j++) double_matrix[i*N+j] = i*i+j*j; } status = 0; wrap_dgesvd('A','A',N,N,double_matrix,N,double_s,double_U,N,double_Vt,N,&status); if (status!=0) SG_SERROR("DGESVD failed with code %d\n",status); delete[] double_s; delete[] double_U; delete[] double_Vt; // DSYEV status = 0; wrap_dsyev('V','U',N,double_matrix,N,double_eigenvalues,&status); if (status!=0) SG_SERROR("DSYEV failed with code %d\n",status); delete[] double_eigenvalues; delete[] double_matrix; } #endif // HAVE_LAPACK int main(int argc, char** argv) { init_shogun_with_defaults(); #ifdef HAVE_LAPACK SG_SPRINT("checking lapack\n"); test_lapack(); SG_SPRINT("compute_eigenvectors\n"); test_ev(); SG_SPRINT("matrix_multiply\n"); test_matrix_multiply(); #endif // HAVE_LAPACK exit_shogun(); return 0; }
#include <shogun/metric/LMNN.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/MulticlassLabels.h> using namespace shogun; int main() { init_shogun_with_defaults(); // create features, each column is a feature vector SGMatrix<float64_t> feat_mat(2,4); // 1st feature vector feat_mat(0,0)=0; feat_mat(1,0)=0; // 2nd feature vector feat_mat(0,1)=0; feat_mat(1,1)=-1; // 3rd feature vector feat_mat(0,2)=1; feat_mat(1,2)=1; // 4th feature vector feat_mat(0,3)=-1; feat_mat(1,3)=1; // wrap feat_mat into Shogun features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(feat_mat); // create labels SGVector<float64_t> lab_vec(4); lab_vec[0]=0; lab_vec[1]=0; lab_vec[2]=1; lab_vec[3]=1; // two-class data, use MulticlassLabels because LMNN works in general for more than two classes CMulticlassLabels* labels=new CMulticlassLabels(lab_vec); // create LMNN metric machine int32_t k=1; // number of target neighbors per example CLMNN* lmnn=new CLMNN(features,labels,k); // use the identity matrix as initial transform for LMNN SGMatrix<float64_t> init_transform=SGMatrix<float64_t>::create_identity_matrix(2,1); // set number of maximum iterations and train lmnn->set_maxiter(1500); // lmnn->io->set_loglevel(MSG_DEBUG); lmnn->train(init_transform); // lmnn->get_linear_transform().display_matrix("linear_transform"); CLMNNStatistics* statistics=lmnn->get_statistics(); /* statistics->obj.display_vector("objective"); statistics->stepsize.display_vector("stepsize"); statistics->num_impostors.display_vector("num_impostors"); */ SG_UNREF(statistics); SG_UNREF(lmnn); exit_shogun(); }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/mathematics/Math.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c=new CModelSelectionParameters("C1"); root->append_child(c); c->build_values(1.0, 2.0, R_EXP); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); root->append_child(param_gaussian_kernel); CModelSelectionParameters* param_gaussian_kernel_width= new CModelSelectionParameters("log_width"); param_gaussian_kernel_width->build_values(0.0, 0.5*CMath::log(2.0), R_LINEAR); param_gaussian_kernel->append_child(param_gaussian_kernel_width); return root; } void apply_parameter_tree(CDynamicObjectArray* combinations) { /* create some data */ SGMatrix<float64_t> matrix(2,3); for (index_t i=0; i<6; i++) matrix.matrix[i]=i; /* create three 2-dimensional vectors * to avoid deleting these, REF now and UNREF when finished */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); SG_REF(features); /* create three labels, will be handed to svm and automaticall deleted */ CBinaryLabels* labels=new CBinaryLabels(3); SG_REF(labels); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); /* create libsvm with C=10 and train */ CLibSVM* svm=new CLibSVM(); SG_REF(svm); svm->set_labels(labels); for (index_t i=0; i<combinations->get_num_elements(); ++i) { SG_SPRINT("applying:\n"); CParameterCombination* current_combination=(CParameterCombination*) combinations->get_element(i); current_combination->print_tree(); Parameter* current_parameters=svm->m_parameters; current_combination->apply_to_modsel_parameter(current_parameters); SG_UNREF(current_combination); /* get kernel to set features, get_kernel SG_REF's the kernel */ CKernel* kernel=svm->get_kernel(); kernel->init(features, features); svm->train(); /* classify on training examples */ for (index_t j=0; j<3; j++) SG_SPRINT("output[%d]=%f\n", j, svm->apply_one(j)); /* unset features and SG_UNREF kernel */ kernel->cleanup(); SG_UNREF(kernel); SG_SPRINT("----------------\n\n"); } /* free up memory */ SG_UNREF(features); SG_UNREF(labels); SG_UNREF(svm); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); /* create example tree */ CModelSelectionParameters* tree=create_param_tree(); tree->print_tree(); SG_SPRINT("----------------------------------\n"); /* build combinations of parameter trees */ CDynamicObjectArray* combinations=tree->get_combinations(); apply_parameter_tree(combinations); /* print and directly delete them all */ for (index_t i=0; i<combinations->get_num_elements(); ++i) { CParameterCombination* combination=(CParameterCombination*) combinations->get_element(i); SG_UNREF(combination); } SG_UNREF(combinations); /* delete example tree (after processing of combinations because CSGObject * (namely the kernel) of the tree is SG_UNREF'ed (and not REF'ed anywhere * else) */ SG_UNREF(tree); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/CombinedFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/classifier/mkl/MKLClassification.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PolyKernel.h> #include <shogun/kernel/CombinedKernel.h> using namespace shogun; /** Creates a bunch of combined kernels with different sub-parameters. * This can be used for modelselection of subkernel parameters of combined * kernels */ CModelSelectionParameters* build_combined_kernel_parameter_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); /* kernel a should be Gaussian with certain parameters * kernel b should be polynomial with certain parameters * This will create a list of combined kernels with all parameter combinations * All CList instances here do reference counting (also the combine_kernels * method of CCombinedKernel */ CList* kernels_a=new CList(true); CList* kernels_b=new CList(true); int32_t cache_size=10; kernels_a->append_element(new CGaussianKernel(cache_size, 2)); kernels_a->append_element(new CGaussianKernel(cache_size, 4)); kernels_b->append_element(new CPolyKernel(cache_size, 4)); kernels_b->append_element(new CPolyKernel(cache_size, 2)); CList* kernel_list=new CList(); kernel_list->append_element(kernels_a); kernel_list->append_element(kernels_b); CList* combinations=CCombinedKernel::combine_kernels(kernel_list); /* add all created combined kernels to parameters tree */ /* cast is safe since the above method guarantees the type */ CCombinedKernel* current=(CCombinedKernel*)(combinations->get_first_element()); SG_SPRINT("combined kernel combinations:\n"); index_t i=0; while (current) { /* print out current kernel's subkernels */ SG_SPRINT("combined kernel %d:\n", i++); CGaussianKernel* gaussian=(CGaussianKernel*)current->get_kernel(0); CPolyKernel* poly=(CPolyKernel*)current->get_kernel(1); SG_SPRINT("kernel_a type: %s\n", poly->get_name()); SG_SPRINT("kernel_b type: %s\n", gaussian->get_name()); SG_SPRINT("kernel_a parameter: %d\n", poly->get_degree()); SG_SPRINT("kernel_b parameter: %f\n", gaussian->get_width()); SG_UNREF(poly); SG_UNREF(gaussian); CModelSelectionParameters* param_kernel= new CModelSelectionParameters("kernel", current); root->append_child(param_kernel); SG_UNREF(current); current=(CCombinedKernel*)(combinations->get_next_element()); } SG_UNREF(combinations); SG_UNREF(kernel_list); SG_UNREF(kernels_a); SG_UNREF(kernels_b); return root; } void modelselection_combined_kernel() { int32_t num_subsets=3; int32_t num_vectors=20; int32_t dim_vectors=3; /* create some data and labels */ SGMatrix<float64_t> matrix(dim_vectors, num_vectors); CBinaryLabels* labels=new CBinaryLabels(num_vectors); for (int32_t i=0; i<num_vectors*dim_vectors; i++) matrix.matrix[i]=CMath::randn_double(); /* create num_feautres 2-dimensional vectors */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); /* create combined features */ CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create svm */ CMKL* classifier=new CMKLClassification(new CLibSVM()); classifier->set_interleaved_optimization_enabled(false); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, comb_features, labels, splitting_strategy, evaluation_criterium); cross->set_num_runs(1); /* TODO: remove this once locking is fixed for combined kernels */ cross->set_autolock(false); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=build_combined_kernel_parameter_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( cross, param_tree); bool print_state=true; CParameterCombination* best_combination=grid_search->select_model( print_state); best_combination->print_tree(); best_combination->apply_to_machine(classifier); /* print subkernel parameters, I know what the subkernel types are here */ CCombinedKernel* kernel=(CCombinedKernel*)classifier->get_kernel(); CGaussianKernel* gaussian=(CGaussianKernel*)kernel->get_kernel(0); CPolyKernel* poly=(CPolyKernel*)kernel->get_kernel(1); SG_SPRINT("gaussian width: %f\n", gaussian->get_width()); SG_SPRINT("poly degree: %d\n", poly->get_degree()); SG_UNREF(kernel); SG_UNREF(gaussian); SG_UNREF(poly); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); // cross->set_conf_int_alpha(0.01); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result: "); result->print_result(); /* clean up destroy result parameter */ SG_UNREF(result); SG_UNREF(best_combination); SG_UNREF(grid_search); } int main(int argc, char **argv) { init_shogun_with_defaults(); modelselection_combined_kernel(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> #include <shogun/mathematics/Math.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(-1.0, 1.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(-1.0, 1.0, R_EXP); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); CModelSelectionParameters* gaussian_kernel_width= new CModelSelectionParameters("log_width"); gaussian_kernel_width->build_values(-CMath::log(2.0), 0.0, R_LINEAR, 1.0); param_gaussian_kernel->append_child(gaussian_kernel_width); root->append_child(param_gaussian_kernel); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); CModelSelectionParameters* param_power_kernel_degree= new CModelSelectionParameters("degree"); param_power_kernel_degree->build_values(1.0, 2.0, R_LINEAR); param_power_kernel->append_child(param_power_kernel_degree); CMinkowskiMetric* m_metric=new CMinkowskiMetric(10); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ m_metric->print_modsel_params(); CModelSelectionParameters* param_power_kernel_metric1= new CModelSelectionParameters("distance", m_metric); param_power_kernel->append_child(param_power_kernel_metric1); CModelSelectionParameters* param_power_kernel_metric1_k= new CModelSelectionParameters("k"); param_power_kernel_metric1_k->build_values(1.0, 2.0, R_LINEAR); param_power_kernel_metric1->append_child(param_power_kernel_metric1_k); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); int32_t num_subsets=3; int32_t num_vectors=20; int32_t dim_vectors=3; /* create some data and labels */ SGMatrix<float64_t> matrix(dim_vectors, num_vectors); CBinaryLabels* labels=new CBinaryLabels(num_vectors); for (int32_t i=0; i<num_vectors*dim_vectors; i++) matrix.matrix[i]=CMath::randn_double(); /* create num_feautres 2-dimensional vectors */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create svm */ CLibSVM* classifier=new CLibSVM(); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium); cross->set_num_runs(1); /* note that this automatically is not necessary since done automatically */ cross->set_autolock(true); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ classifier->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( cross, param_tree); bool print_state=true; CParameterCombination* best_combination=grid_search->select_model( print_state); best_combination->print_tree(); best_combination->apply_to_machine(classifier); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); // cross->set_conf_int_alpha(0.01); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result: "); result->print_result(); /* now again but unlocked */ SG_UNREF(best_combination); cross->set_autolock(true); best_combination=grid_search->select_model(print_state); best_combination->apply_to_machine(classifier); SG_UNREF(result); result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result (unlocked): "); /* clean up destroy result parameter */ SG_UNREF(result); SG_UNREF(best_combination); SG_UNREF(grid_search); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/Labels.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PolyKernel.h> #include <shogun/regression/KernelRidgeRegression.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/CrossValidationSplitting.h> #include <shogun/evaluation/MeanSquaredError.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ParameterCombination.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* tau=new CModelSelectionParameters("tau"); root->append_child(tau); tau->build_values(-1.0, 1.0, R_EXP); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); CModelSelectionParameters* gaussian_kernel_width= new CModelSelectionParameters("width"); gaussian_kernel_width->build_values(5.0, 8.0, R_EXP, 1.0, 2.0); param_gaussian_kernel->append_child(gaussian_kernel_width); root->append_child(param_gaussian_kernel); CPolyKernel* poly_kernel=new CPolyKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ poly_kernel->print_modsel_params(); CModelSelectionParameters* param_poly_kernel= new CModelSelectionParameters("kernel", poly_kernel); root->append_child(param_poly_kernel); CModelSelectionParameters* param_poly_kernel_degree= new CModelSelectionParameters("degree"); param_poly_kernel_degree->build_values(2, 3, R_LINEAR); param_poly_kernel->append_child(param_poly_kernel_degree); return root; } void test_cross_validation() { /* data matrix dimensions */ index_t num_vectors=30; index_t num_features=1; /* training label data */ SGVector<float64_t> lab(num_vectors); /* fill data matrix and labels */ SGMatrix<float64_t> train_dat(num_features, num_vectors); CMath::range_fill_vector(train_dat.matrix, num_vectors); for (index_t i=0; i<num_vectors; ++i) { /* labels are linear plus noise */ lab.vector[i]=i+CMath::normal_random(0, 1.0); } /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels */ CLabels* labels=new CLabels(lab); /* kernel ridge regression, only set labels for now, rest does not matter */ CKernelRidgeRegression* krr=new CKernelRidgeRegression(0, NULL, labels); /* evaluation criterion */ CMeanSquaredError* eval_crit= new CMeanSquaredError(); /* splitting strategy */ index_t n_folds=5; CCrossValidationSplitting* splitting= new CCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(krr, features, labels, splitting, eval_crit); cross->set_num_runs(3); // cross->set_conf_int_alpha(0.05); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ krr->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( param_tree, cross); /* print current combination */ bool print_state=true; CParameterCombination* best_combination=grid_search->select_model( print_state); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(krr); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); // cross->set_conf_int_alpha(0.01); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result: "); result->print_result(); /* clean up */ SG_UNREF(features); SG_UNREF(best_combination); SG_UNREF(result); SG_UNREF(grid_search); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/svm/LibLinear.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(-2.0, 2.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(-2.0, 2.0, R_EXP); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); #ifdef HAVE_LAPACK int32_t num_subsets=5; int32_t num_vectors=11; /* create some data */ SGMatrix<float64_t> matrix(2, num_vectors); for (int32_t i=0; i<num_vectors*2; i++) matrix.matrix[i]=i; /* create num_feautres 2-dimensional vectors */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); /* create three labels */ CBinaryLabels* labels=new CBinaryLabels(num_vectors); for (index_t i=0; i<num_vectors; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create linear classifier (use -s 2 option to avoid warnings) */ CLibLinear* classifier=new CLibLinear(L2R_L2LOSS_SVC); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ classifier->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( cross, param_tree); /* set autolocking to false to get rid of warnings */ cross->set_autolock(false); CParameterCombination* best_combination=grid_search->select_model(); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); result->print_result(); /* clean up */ SG_UNREF(result); SG_UNREF(best_combination); SG_UNREF(grid_search); #endif // HAVE_LAPACK exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012-2014 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/mkl/MKLClassification.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CombinedKernel.h> using namespace shogun; CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(-1.0, 1.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(-1.0, 1.0, R_EXP); CCombinedKernel* kernel1=new CCombinedKernel(); kernel1->append_kernel(new CGaussianKernel(10, 2)); kernel1->append_kernel(new CGaussianKernel(10, 3)); kernel1->append_kernel(new CGaussianKernel(10, 4)); CModelSelectionParameters* param_kernel1= new CModelSelectionParameters("kernel", kernel1); root->append_child(param_kernel1); CCombinedKernel* kernel2=new CCombinedKernel(); kernel2->append_kernel(new CGaussianKernel(10, 20)); kernel2->append_kernel(new CGaussianKernel(10, 30)); kernel2->append_kernel(new CGaussianKernel(10, 40)); CModelSelectionParameters* param_kernel2= new CModelSelectionParameters("kernel", kernel2); root->append_child(param_kernel2); return root; } void test() { int32_t num_subsets=3; int32_t num_vectors=20; int32_t dim_vectors=3; /* create some data and labels */ SGMatrix<float64_t> matrix(dim_vectors, num_vectors); for (int32_t i=0; i<num_vectors*dim_vectors; i++) matrix.matrix[i]=CMath::randn_double(); /* create feature object */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); /* create combined features */ CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); SG_REF(comb_features); /* create labels, two classes */ CBinaryLabels* labels=new CBinaryLabels(num_vectors); SG_REF(labels); for (index_t i=0; i<num_vectors; ++i) labels->set_label(i, i%2==0 ? +1 : -1); /* works */ // /* create svm */ // CMKLClassification* classifier=new CMKLClassification(new CLibSVM()); // classifier->set_interleaved_optimization_enabled(false); /* create svm */ CMKLClassification* classifier=new CMKLClassification(); // both fail: //classifier->set_interleaved_optimization_enabled(false); classifier->set_interleaved_optimization_enabled(true); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterion= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, comb_features, labels, splitting_strategy, evaluation_criterion); cross->set_num_runs(1); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ classifier->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( cross, param_tree); // This unfortunately currently creates a NULL pointer read SEGFAULT :( // reported on github: MKL Multiclass null pointer read //bool print_state=true; /*CParameterCombination* best_combination=grid_search->select_model( print_state); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier);*/ /* larger number of runs to have tighter confidence intervals */ /*cross->set_num_runs(10); cross->set_conf_int_alpha(0.01); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); SG_SPRINT("result: %f", result->mean);*/ /* clean up */ SG_UNREF(comb_features); SG_UNREF(labels); //SG_UNREF(best_combination); SG_UNREF(grid_search); } int main(int argc, char **argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_INFO); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/MulticlassAccuracy.h> #include <shogun/modelselection/ModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/multiclass/MulticlassLibSVM.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/mathematics/Math.h> using namespace shogun; CModelSelectionParameters* build_param_tree(CKernel* kernel) { CModelSelectionParameters * root=new CModelSelectionParameters(); CModelSelectionParameters * c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(-1.0, 1.0, R_EXP); CModelSelectionParameters * params_kernel=new CModelSelectionParameters("kernel", kernel); root->append_child(params_kernel); CModelSelectionParameters * params_kernel_width=new CModelSelectionParameters("log_width"); params_kernel_width->build_values(-CMath::log(2.0), 0.0, R_LINEAR); params_kernel->append_child(params_kernel_width); return root; } void test() { /* number of classes is dimension of data here to have some easy multiclass * structure */ const unsigned int num_vectors=50; const unsigned int dim_vectors=3; // Heiko: increase number of classes and things will fail :( // Sergey: the special buggy case of 3 classes was hopefully fixed float64_t distance=5; /* create data: some easy multiclass data */ SGMatrix<float64_t> feat=SGMatrix<float64_t>(dim_vectors, num_vectors); SGVector<float64_t> lab(num_vectors); for (index_t j=0; j<feat.num_cols; ++j) { lab[j]=j%dim_vectors; for (index_t i=0; i<feat.num_rows; ++i) feat(i, j)=CMath::randn_double(); /* make sure classes are (alomst) linearly seperable against each other */ feat(lab[j],j)+=distance; } /* shogun representation of above data */ CDenseFeatures<float64_t> * cfeatures=new CDenseFeatures<float64_t>(feat); CMulticlassLabels * clabels=new CMulticlassLabels(lab); float64_t sigma=2; CGaussianKernel* kernel=new CGaussianKernel(10, sigma); const float C=10.; CMulticlassLibSVM* cmachine=new CMulticlassLibSVM(C, kernel, clabels); CMulticlassAccuracy * eval_crit=new CMulticlassAccuracy(); /* k-fold stratified x-validation */ index_t k=3; CStratifiedCrossValidationSplitting * splitting= new CStratifiedCrossValidationSplitting(clabels, k); CCrossValidation * cross=new CCrossValidation(cmachine, cfeatures, clabels, splitting, eval_crit); cross->set_num_runs(10); // cross->set_conf_int_alpha(0.05); /* create peramters for model selection */ CModelSelectionParameters* root=build_param_tree(kernel); CGridSearchModelSelection * model_selection=new CGridSearchModelSelection( cross, root); bool print_state=true; CParameterCombination * params=model_selection->select_model(print_state); SG_SPRINT("best combination\n"); params->print_tree(); /* clean up memory */ SG_UNREF(model_selection); SG_UNREF(params); } int main(int argc, char **argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/StringFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> #include <shogun/lib/SGStringList.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(1.0, 2.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(1.0, 2.0, R_EXP); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ ds_kernel->print_modsel_params(); CModelSelectionParameters* param_ds_kernel= new CModelSelectionParameters("kernel", ds_kernel); root->append_child(param_ds_kernel); CModelSelectionParameters* ds_kernel_delta= new CModelSelectionParameters("delta"); ds_kernel_delta->build_values(1, 2, R_LINEAR); param_ds_kernel->append_child(ds_kernel_delta); CModelSelectionParameters* ds_kernel_theta= new CModelSelectionParameters("theta"); ds_kernel_theta->build_values(1, 2, R_LINEAR); param_ds_kernel->append_child(ds_kernel_theta); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_strings=10; index_t max_string_length=20; index_t min_string_length=max_string_length/2; index_t num_subsets=num_strings/3; SGStringList<char> strings(num_strings, max_string_length); for (index_t i=0; i<num_strings; ++i) { index_t len=CMath::random(min_string_length, max_string_length); SGString<char> current(len); SG_SPRINT("string %i: \"", i); /* fill with random uppercase letters (ASCII) */ for (index_t j=0; j<len; ++j) { current.string[j]=(char)CMath::random('A', 'Z'); char* string=new char[2]; string[0]=current.string[j]; string[1]='\0'; SG_SPRINT("%s", string); delete[] string; } SG_SPRINT("\"\n"); strings.strings[i]=current; } /* create num_feautres 2-dimensional vectors */ CStringFeatures<char>* features=new CStringFeatures<char>(strings, ALPHANUM); /* create labels, two classes */ CBinaryLabels* labels=new CBinaryLabels(num_strings); for (index_t i=0; i<num_strings; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create svm classifier */ CLibSVM* classifier=new CLibSVM(); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium); cross->set_num_runs(2); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ classifier->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( cross, param_tree); bool print_state=true; CParameterCombination* best_combination=grid_search->select_model( print_state); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); // cross->set_conf_int_alpha(0.01); classifier->data_lock(labels, features); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result: "); result->print_result(); /* clean up */ SG_UNREF(result); SG_UNREF(best_combination); SG_UNREF(grid_search); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> #include <shogun/mathematics/Math.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* build_complex_example_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(1.0, 1.0, R_EXP); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); CModelSelectionParameters* param_power_kernel_degree= new CModelSelectionParameters("degree"); param_power_kernel_degree->build_values(1.0, 1.0, R_EXP); param_power_kernel->append_child(param_power_kernel_degree); CMinkowskiMetric* m_metric=new CMinkowskiMetric(10); CModelSelectionParameters* param_power_kernel_metric1= new CModelSelectionParameters("distance", m_metric); param_power_kernel->append_child(param_power_kernel_metric1); CModelSelectionParameters* param_power_kernel_metric1_k= new CModelSelectionParameters("k"); param_power_kernel_metric1_k->build_values(1.0, 12.0, R_LINEAR); param_power_kernel_metric1->append_child(param_power_kernel_metric1_k); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); root->append_child(param_gaussian_kernel); CModelSelectionParameters* param_gaussian_kernel_width= new CModelSelectionParameters("log_width"); param_gaussian_kernel_width->build_values(0.0, 0.5*CMath::log(2.0), R_LINEAR); param_gaussian_kernel->append_child(param_gaussian_kernel_width); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ ds_kernel->print_modsel_params(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters("kernel", ds_kernel); root->append_child(param_ds_kernel); CModelSelectionParameters* param_ds_kernel_delta= new CModelSelectionParameters("delta"); param_ds_kernel_delta->build_values(1.0, 2.0, R_EXP); param_ds_kernel->append_child(param_ds_kernel_delta); CModelSelectionParameters* param_ds_kernel_theta= new CModelSelectionParameters("theta"); param_ds_kernel_theta->build_values(1.0, 2.0, R_EXP); param_ds_kernel->append_child(param_ds_kernel_theta); return root; } CModelSelectionParameters* build_sgobject_no_childs_tree() { CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); return param_power_kernel; } CModelSelectionParameters* build_leaf_node_tree() { CModelSelectionParameters* c_1=new CModelSelectionParameters("C1"); c_1->build_values(1.0, 1.0, R_EXP); return c_1; } CModelSelectionParameters* build_root_no_childs_tree() { return new CModelSelectionParameters(); } CModelSelectionParameters* build_root_value_childs_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c_1=new CModelSelectionParameters("C1"); root->append_child(c_1); c_1->build_values(1.0, 1.0, R_EXP); CModelSelectionParameters* c_2=new CModelSelectionParameters("C2"); root->append_child(c_2); c_2->build_values(1.0, 1.0, R_EXP); return root; } CModelSelectionParameters* build_root_sg_object_child_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); return root; } CModelSelectionParameters* build_root_sg_object_child_value_child_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); CModelSelectionParameters* c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(1.0, 1.0, R_EXP); root->append_child(param_power_kernel); return root; } void test_get_combinations(CModelSelectionParameters* tree) { tree->print_tree(); /* build combinations of parameter trees */ CDynamicObjectArray* combinations=tree->get_combinations(); /* print and directly delete them all */ SG_SPRINT("----------------------------------\n"); for (index_t i=0; i<combinations->get_num_elements(); ++i) { CParameterCombination* combination=(CParameterCombination*) combinations->get_element(i); combination->print_tree(); SG_UNREF(combination); } SG_UNREF(combinations); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); CModelSelectionParameters* tree; tree=build_root_no_childs_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_leaf_node_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_sgobject_no_childs_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_root_value_childs_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_root_sg_object_child_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_root_sg_object_child_value_child_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_complex_example_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/lib/DynamicObjectArray.h> #include <shogun/lib/SGVector.h> #include <stdlib.h> using namespace std; using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_parameter_set_multiplication() { SG_SPRINT("\ntest_parameter_set_multiplication()\n"); DynArray<Parameter*> set1; DynArray<Parameter*> set2; SGVector<float64_t> param_vector(8); SGVector<float64_t>::range_fill_vector(param_vector.vector, param_vector.vlen); Parameter parameters[4]; parameters[0].add(¶m_vector.vector[0], "0"); parameters[0].add(¶m_vector.vector[1], "1"); set1.append_element(¶meters[0]); parameters[1].add(¶m_vector.vector[2], "2"); parameters[1].add(¶m_vector.vector[3], "3"); set1.append_element(¶meters[1]); parameters[2].add(¶m_vector.vector[4], "4"); parameters[2].add(¶m_vector.vector[5], "5"); set2.append_element(¶meters[2]); parameters[3].add(¶m_vector.vector[6], "6"); parameters[3].add(¶m_vector.vector[7], "7"); set2.append_element(¶meters[3]); DynArray<Parameter*>* result=new DynArray<Parameter*>();//CParameterCombination::parameter_set_multiplication(set1, set2); for (index_t i=0; i<result->get_num_elements(); ++i) { Parameter* p=result->get_element(i); for (index_t j=0; j<p->get_num_parameters(); ++j) SG_SPRINT("%s ", p->get_parameter(j)->m_name); SG_SPRINT("\n"); delete p; } delete result; } void test_leaf_sets_multiplication() { SG_SPRINT("\ntest_leaf_sets_multiplication()\n"); SGVector<float64_t> param_vector(6); SGVector<float64_t>::range_fill_vector(param_vector.vector, param_vector.vlen); CDynamicObjectArray sets; CParameterCombination* new_root=new CParameterCombination(); SG_REF(new_root); CDynamicObjectArray* current=new CDynamicObjectArray(); sets.append_element(current); Parameter* p=new Parameter(); p->add(¶m_vector.vector[0], "0"); CParameterCombination* pc=new CParameterCombination(p); current->append_element(pc); p=new Parameter(); p->add(¶m_vector.vector[1], "1"); pc=new CParameterCombination(p); current->append_element(pc); /* first case: one element */ CDynamicObjectArray* result_simple= CParameterCombination::leaf_sets_multiplication(sets, new_root); SG_SPRINT("one set\n"); for (index_t i=0; i<result_simple->get_num_elements(); ++i) { CParameterCombination* tpc=(CParameterCombination*) result_simple->get_element(i); tpc->print_tree(); SG_UNREF(tpc); } SG_UNREF(result_simple); /* now more elements are created */ current=new CDynamicObjectArray(); sets.append_element(current); p=new Parameter(); p->add(¶m_vector.vector[2], "2"); pc=new CParameterCombination(p); current->append_element(pc); p=new Parameter(); p->add(¶m_vector.vector[3], "3"); pc=new CParameterCombination(p); current->append_element(pc); current=new CDynamicObjectArray(); sets.append_element(current); p=new Parameter(); p->add(¶m_vector.vector[4], "4"); pc=new CParameterCombination(p); current->append_element(pc); p=new Parameter(); p->add(¶m_vector.vector[5], "5"); pc=new CParameterCombination(p); current->append_element(pc); /* second case: more element */ CDynamicObjectArray* result_complex= CParameterCombination::leaf_sets_multiplication(sets, new_root); SG_SPRINT("more sets\n"); for (index_t i=0; i<result_complex->get_num_elements(); ++i) { CParameterCombination* tpc=(CParameterCombination*) result_complex->get_element(i); tpc->print_tree(); SG_UNREF(tpc); } SG_UNREF(result_complex); SG_UNREF(new_root); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_parameter_set_multiplication(); test_leaf_sets_multiplication(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/distance/MinkowskiMetric.h> #include <shogun/distance/EuclideanDistance.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> #include <shogun/machine/gp/ExactInferenceMethod.h> #include <shogun/machine/gp/GaussianLikelihood.h> #include <shogun/machine/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> using namespace shogun; void test_tree(CModelSelectionParameters* tree) { SG_SPRINT("\n\ntree to process:\n"); tree->print_tree(); /* build combinations of parameter trees */ CDynamicObjectArray* combinations=tree->get_combinations(); /* print and directly delete them all */ SG_SPRINT("----------------------------------\n"); for (index_t i=0; i<combinations->get_num_elements(); ++i) { CParameterCombination* combination= (CParameterCombination*)combinations->get_element(i); combination->print_tree(); SG_UNREF(combination); } SG_UNREF(combinations); } CModelSelectionParameters* create_param_tree_1() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(1, 2, R_EXP); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel=new CModelSelectionParameters( "kernel", power_kernel); root->append_child(param_power_kernel); CModelSelectionParameters* param_power_kernel_degree= new CModelSelectionParameters("degree"); param_power_kernel_degree->build_values(1, 2, R_EXP); param_power_kernel->append_child(param_power_kernel_degree); CMinkowskiMetric* m_metric=new CMinkowskiMetric(10); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ m_metric->print_modsel_params(); CModelSelectionParameters* param_power_kernel_metrikernel_width_sigma_param= new CModelSelectionParameters("distance", m_metric); param_power_kernel->append_child( param_power_kernel_metrikernel_width_sigma_param); CModelSelectionParameters* param_power_kernel_metrikernel_width_sigma_param_k= new CModelSelectionParameters("k"); param_power_kernel_metrikernel_width_sigma_param_k->build_values(1, 2, R_LINEAR); param_power_kernel_metrikernel_width_sigma_param->append_child( param_power_kernel_metrikernel_width_sigma_param_k); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); root->append_child(param_gaussian_kernel); CModelSelectionParameters* param_gaussian_kernel_width= new CModelSelectionParameters("log_width"); param_gaussian_kernel_width->build_values(0.0, 0.5*CMath::log(2), R_LINEAR); param_gaussian_kernel->append_child(param_gaussian_kernel_width); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ ds_kernel->print_modsel_params(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters( "kernel", ds_kernel); root->append_child(param_ds_kernel); CModelSelectionParameters* param_ds_kernel_delta= new CModelSelectionParameters("delta"); param_ds_kernel_delta->build_values(1, 2, R_EXP); param_ds_kernel->append_child(param_ds_kernel_delta); CModelSelectionParameters* param_ds_kernel_theta= new CModelSelectionParameters("theta"); param_ds_kernel_theta->build_values(1, 2, R_EXP); param_ds_kernel->append_child(param_ds_kernel_theta); return root; } CModelSelectionParameters* create_param_tree_2() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel=new CModelSelectionParameters( "kernel", power_kernel); root->append_child(param_power_kernel); CMinkowskiMetric* metric=new CMinkowskiMetric(); CModelSelectionParameters* param_power_kernel_metric= new CModelSelectionParameters("distance", metric); param_power_kernel->append_child(param_power_kernel_metric); CModelSelectionParameters* param_metric_k=new CModelSelectionParameters( "k"); param_metric_k->build_values(2, 3, R_LINEAR); param_power_kernel_metric->append_child(param_metric_k); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters( "kernel", ds_kernel); root->append_child(param_ds_kernel); return root; } CModelSelectionParameters* create_param_tree_3() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel=new CModelSelectionParameters( "kernel", power_kernel); root->append_child(param_power_kernel); CMinkowskiMetric* metric=new CMinkowskiMetric(); CModelSelectionParameters* param_power_kernel_metric= new CModelSelectionParameters("distance", metric); param_power_kernel->append_child(param_power_kernel_metric); CEuclideanDistance* euclidean=new CEuclideanDistance(); CModelSelectionParameters* param_power_kernel_distance= new CModelSelectionParameters("distance", euclidean); param_power_kernel->append_child(param_power_kernel_distance); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters( "kernel", ds_kernel); root->append_child(param_ds_kernel); return root; } CModelSelectionParameters* create_param_tree_4a() { CModelSelectionParameters* root=new CModelSelectionParameters(); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); CRegressionLabels* labels=new CRegressionLabels(); CGaussianKernel* gaussian_kernel=new CGaussianKernel(10, 2); CPowerKernel* power_kernel=new CPowerKernel(); CZeroMean* mean=new CZeroMean(); CGaussianLikelihood* lik=new CGaussianLikelihood(); CExactInferenceMethod* inf=new CExactInferenceMethod(gaussian_kernel, features, mean, labels, lik); CLibSVM* svm=new CLibSVM(); CPowerKernel* power_kernel_svm=new CPowerKernel(); CGaussianKernel* gaussian_kernel_svm=new CGaussianKernel(10, 2); CModelSelectionParameters* param_inf=new CModelSelectionParameters( "inference_method", inf); root->append_child(param_inf); CModelSelectionParameters* param_inf_gaussian=new CModelSelectionParameters( "likelihood_model", lik); param_inf->append_child(param_inf_gaussian); CModelSelectionParameters* param_inf_kernel_1=new CModelSelectionParameters( "kernel", gaussian_kernel); param_inf->append_child(param_inf_kernel_1); CModelSelectionParameters* param_inf_kernel_2=new CModelSelectionParameters( "kernel", power_kernel); param_inf->append_child(param_inf_kernel_2); CModelSelectionParameters* param_svm=new CModelSelectionParameters( "SVM", svm); root->append_child(param_svm); CModelSelectionParameters* param_svm_kernel_1=new CModelSelectionParameters( "kernel", power_kernel_svm); param_svm->append_child(param_svm_kernel_1); CModelSelectionParameters* param_svm_kernel_2=new CModelSelectionParameters( "kernel", gaussian_kernel_svm); param_svm->append_child(param_svm_kernel_2); return root; } CModelSelectionParameters* create_param_tree_4b() { CModelSelectionParameters* root=new CModelSelectionParameters(); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); CRegressionLabels* labels=new CRegressionLabels(); CGaussianKernel* gaussian_kernel=new CGaussianKernel(10, 2); CPowerKernel* power_kernel=new CPowerKernel(); CZeroMean* mean=new CZeroMean(); CGaussianLikelihood* lik=new CGaussianLikelihood(); CExactInferenceMethod* inf=new CExactInferenceMethod(gaussian_kernel, features, mean, labels, lik); CLibSVM* svm=new CLibSVM(); CPowerKernel* power_kernel_svm=new CPowerKernel(); CGaussianKernel* gaussian_kernel_svm=new CGaussianKernel(10, 2); CModelSelectionParameters* param_c=new CModelSelectionParameters("C1"); root->append_child(param_c); param_c->build_values(1,2,R_EXP); CModelSelectionParameters* param_inf=new CModelSelectionParameters( "inference_method", inf); root->append_child(param_inf); CModelSelectionParameters* param_inf_gaussian=new CModelSelectionParameters( "likelihood_model", lik); param_inf->append_child(param_inf_gaussian); CModelSelectionParameters* param_inf_kernel_1=new CModelSelectionParameters( "kernel", gaussian_kernel); param_inf->append_child(param_inf_kernel_1); CModelSelectionParameters* param_inf_kernel_2=new CModelSelectionParameters( "kernel", power_kernel); param_inf->append_child(param_inf_kernel_2); CModelSelectionParameters* param_svm=new CModelSelectionParameters( "SVM", svm); root->append_child(param_svm); CModelSelectionParameters* param_svm_kernel_1=new CModelSelectionParameters( "kernel", power_kernel_svm); param_svm->append_child(param_svm_kernel_1); CModelSelectionParameters* param_svm_kernel_2=new CModelSelectionParameters( "kernel", gaussian_kernel_svm); param_svm->append_child(param_svm_kernel_2); return root; } CModelSelectionParameters* create_param_tree_5() { CModelSelectionParameters* root=new CModelSelectionParameters(); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); CRegressionLabels* labels=new CRegressionLabels(); CGaussianKernel* gaussian_kernel=new CGaussianKernel(10, 2); CLinearKernel* linear_kernel=new CLinearKernel(); CPowerKernel* power_kernel=new CPowerKernel(); CZeroMean* mean=new CZeroMean(); CGaussianLikelihood* lik=new CGaussianLikelihood(); CExactInferenceMethod* inf=new CExactInferenceMethod(gaussian_kernel, features, mean, labels, lik); CModelSelectionParameters* param_inf=new CModelSelectionParameters( "inference_method", inf); root->append_child(param_inf); CModelSelectionParameters* param_inf_gaussian=new CModelSelectionParameters( "likelihood_model", lik); param_inf->append_child(param_inf_gaussian); CModelSelectionParameters* param_inf_gaussian_sigma= new CModelSelectionParameters("log_sigma"); param_inf_gaussian->append_child(param_inf_gaussian_sigma); param_inf_gaussian_sigma->build_values(2.0*CMath::log(2.0), 3.0*CMath::log(2.0), R_LINEAR); CModelSelectionParameters* param_inf_kernel_1=new CModelSelectionParameters( "kernel", gaussian_kernel); param_inf->append_child(param_inf_kernel_1); CModelSelectionParameters* param_inf_kernel_width= new CModelSelectionParameters("log_width"); param_inf_kernel_1->append_child(param_inf_kernel_width); param_inf_kernel_width->build_values(0.0, 0.5*CMath::log(2.0), R_LINEAR); CModelSelectionParameters* param_inf_kernel_2=new CModelSelectionParameters( "kernel", linear_kernel); param_inf->append_child(param_inf_kernel_2); CModelSelectionParameters* param_inf_kernel_3=new CModelSelectionParameters( "kernel", power_kernel); param_inf->append_child(param_inf_kernel_3); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); CModelSelectionParameters* tree=NULL; tree=create_param_tree_1(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); tree=create_param_tree_2(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); tree=create_param_tree_3(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); tree=create_param_tree_4a(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); tree=create_param_tree_4b(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); tree=create_param_tree_5(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); exit_shogun(); return 0; }
/* * Copyright (c) 2014, Shogun Toolbox Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Written (W) 2014 Khaled Nasr */ #include <shogun/base/init.h> #include <shogun/mathematics/Math.h> #include <shogun/features/DataGenerator.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/evaluation/MulticlassAccuracy.h> #include <shogun/neuralnets/NeuralNetwork.h> #include <shogun/neuralnets/NeuralLayers.h> using namespace shogun; int main(int, char*[]) { init_shogun_with_defaults(); #ifdef HAVE_LAPACK // for CDataGenerator::generate_gaussian() // initialize the random number generator with a fixed seed, for repeatability CMath::init_random(10); // Prepare the training data const int num_classes = 4; const int num_features = 10; const int num_examples_per_class = 20; SGMatrix<float64_t> X; SGVector<float64_t> Y; try { X = CDataGenerator::generate_gaussians( num_examples_per_class,num_classes,num_features); Y = SGVector<float64_t>(num_classes*num_examples_per_class); } catch (ShogunException e) { // out of memory SG_SPRINT(e.get_exception_string()); return 0; } for (int32_t i = 0; i < num_classes; i++) for (int32_t j = 0; j < num_examples_per_class; j++) Y[i*num_examples_per_class + j] = i; CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(X); CMulticlassLabels* labels = new CMulticlassLabels(Y); // Create a small network single hidden layer network CNeuralLayers* layers = new CNeuralLayers(); layers->input(num_features)->rectified_linear(10)->softmax(num_classes); CNeuralNetwork* network = new CNeuralNetwork(layers->done()); // initialize the network network->quick_connect(); network->initialize_neural_network(); // uncomment this line to enable info logging // network->io->set_loglevel(MSG_INFO); // train using default parameters network->set_labels(labels); network->train(features); // evaluate CMulticlassLabels* predictions = network->apply_multiclass(features); CMulticlassAccuracy* evaluator = new CMulticlassAccuracy(); float64_t accuracy = evaluator->evaluate(predictions, labels); SG_SINFO("Accuracy = %f %\n", accuracy*100); // Clean up SG_UNREF(network); SG_UNREF(layers); SG_UNREF(features); SG_UNREF(predictions); SG_UNREF(evaluator); #endif exit_shogun(); return 0; }
/* * Copyright (c) 2014, Shogun Toolbox Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Written (W) 2014 Khaled Nasr */ #include <shogun/base/init.h> #include <shogun/mathematics/Math.h> #include <shogun/features/DataGenerator.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/evaluation/MulticlassAccuracy.h> #include <shogun/lib/DynamicObjectArray.h> #include <shogun/neuralnets/NeuralNetwork.h> #include <shogun/neuralnets/NeuralInputLayer.h> #include <shogun/neuralnets/NeuralSoftmaxLayer.h> #include <shogun/neuralnets/NeuralConvolutionalLayer.h> using namespace shogun; int main(int, char*[]) { init_shogun_with_defaults(); #ifdef HAVE_LAPACK // for CDataGenerator::generate_gaussian() // initialize the random number generator with a fixed seed, for repeatability CMath::init_random(10); // Prepare the training data const int width = 4; const int height = 4; const int num_channels = 2; const int num_features = width*height*num_channels; const int num_classes = 3; const int num_examples_per_class = 15; SGMatrix<float64_t> X; SGVector<float64_t> Y; try { X = CDataGenerator::generate_gaussians( num_examples_per_class,num_classes,num_features); Y = SGVector<float64_t>(num_classes*num_examples_per_class); } catch (ShogunException e) { // out of memory SG_SPRINT(e.get_exception_string()); return 0; } for (int32_t i = 0; i < num_classes; i++) for (int32_t j = 0; j < num_examples_per_class; j++) Y[i*num_examples_per_class + j] = i; CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(X); CMulticlassLabels* labels = new CMulticlassLabels(Y); // prepare the layers CDynamicObjectArray* layers = new CDynamicObjectArray(); // input layer layers->append_element(new CNeuralInputLayer(width,height,num_channels)); // first convolutional layer: 3 feature maps, 3x3 masks, 2x2 max-pooling layers->append_element(new CNeuralConvolutionalLayer( CMAF_RECTIFIED_LINEAR, 3, 1,1, 2,2)); // second convolutional layer: 5 feature maps, 3x3 masks layers->append_element(new CNeuralConvolutionalLayer( CMAF_RECTIFIED_LINEAR, 5, 1,1)); // output layer layers->append_element(new CNeuralSoftmaxLayer(num_classes)); // create and initialize the network CNeuralNetwork* network = new CNeuralNetwork(layers); network->quick_connect(); network->initialize_neural_network(0.1); // uncomment this line to enable info logging // network->io->set_loglevel(MSG_INFO); // train using default parameters network->set_labels(labels); network->train(features); // evaluate CMulticlassLabels* predictions = network->apply_multiclass(features); CMulticlassAccuracy* evaluator = new CMulticlassAccuracy(); float64_t accuracy = evaluator->evaluate(predictions, labels); SG_SINFO("Accuracy = %f %\n", accuracy*100); // Clean up SG_UNREF(network); SG_UNREF(features); SG_UNREF(predictions); SG_UNREF(evaluator); #endif exit_shogun(); return 0; }
/* * Copyright (c) 2014, Shogun Toolbox Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Written (W) 2014 Khaled Nasr */ #include <shogun/base/init.h> #include <shogun/mathematics/Math.h> #include <shogun/features/DataGenerator.h> #include <shogun/features/DenseFeatures.h> #include <shogun/neuralnets/DeepAutoencoder.h> #include <shogun/neuralnets/NeuralLayers.h> using namespace shogun; int main(int, char*[]) { init_shogun_with_defaults(); #ifdef HAVE_LAPACK // for CDataGenerator::generate_gaussian() // initialize the random number generator with a fixed seed, for repeatability CMath::init_random(10); // Prepare the training data const int num_features = 20; const int num_classes = 4; const int num_examples_per_class = 20; SGMatrix<float64_t> X; try { X = CDataGenerator::generate_gaussians( num_examples_per_class,num_classes,num_features); } catch (ShogunException e) { // out of memory SG_SPRINT(e.get_exception_string()); return 0; } CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(X); // Create a deep autoencoder CNeuralLayers* layers = new CNeuralLayers(); layers ->input(num_features) ->rectified_linear(10)->rectified_linear(5)->rectified_linear(10) ->linear(num_features); CDeepAutoencoder* ae = new CDeepAutoencoder(layers->done()); // uncomment this line to enable info logging // ae->io->set_loglevel(MSG_INFO); // pre-train ae->pt_epsilon.set_const(1e-6); ae->pre_train(features); // fine-tune ae->train(features); // reconstruct the data CDenseFeatures<float64_t>* reconstructions = ae->reconstruct(features); SGMatrix<float64_t> X_reconstructed = reconstructions->get_feature_matrix(); // find the average difference between the data and the reconstructions float64_t avg_diff = 0; int32_t N = X.num_rows*X.num_cols; for (int32_t i=0; i<N; i++) avg_diff += CMath::abs(X[i]-X_reconstructed[i])/CMath::abs(X[i]); avg_diff /= N; SG_SINFO("Average difference = %f %\n", avg_diff*100); // Clean up SG_UNREF(ae); SG_UNREF(layers); SG_UNREF(features); SG_UNREF(reconstructions); #endif exit_shogun(); return 0; }
/* * Copyright (c) 2014, Shogun Toolbox Foundation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. Neither the name of the copyright holder nor the names of its * contributors may be used to endorse or promote products derived from this * software without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * Written (W) 2014 Khaled Nasr */ #include <shogun/base/init.h> #include <shogun/mathematics/Math.h> #include <shogun/features/DenseFeatures.h> #include <shogun/mathematics/Statistics.h> #include <shogun/neuralnets/DeepBeliefNetwork.h> using namespace shogun; int main(int, char*[]) { init_shogun_with_defaults(); // initialize the random number generator with a fixed seed, for repeatability CMath::init_random(10); // Prepare the training data const int num_features = 5; const int num_examples= 50; SGVector<float64_t> means; SGMatrix<float64_t> X; try { means = SGVector<float64_t>(num_features); X = SGMatrix<float64_t>(num_features, num_examples); } catch (ShogunException e) { // out of memory SG_SPRINT(e.get_exception_string()); return 0; } for (int32_t i=0; i<num_features; i++) means[i] = CMath::random(-1.0,1.0); for (int32_t i=0; i<num_features; i++) for (int32_t j=0; j<num_examples; j++) X(i,j) = CMath::normal_random(means[i], 1.0); CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(X); // Create a DBN CDeepBeliefNetwork* dbn = new CDeepBeliefNetwork(num_features, RBMVUT_GAUSSIAN); dbn->add_hidden_layer(10); dbn->add_hidden_layer(10); dbn->add_hidden_layer(20); dbn->initialize_neural_network(); // uncomment this line to enable info logging // dbn->io->set_loglevel(MSG_INFO); // pre-train dbn->pt_max_num_epochs.set_const(100); dbn->pt_cd_num_steps.set_const(10); dbn->pt_gd_learning_rate.set_const(0.01); dbn->pre_train(features); // fine-tune dbn->max_num_epochs = 100; dbn->cd_num_steps = 10; dbn->gd_learning_rate = 0.01; dbn->train(features); // draw 1000 samples from the DBN CDenseFeatures<float64_t>* samples = dbn->sample(100,1000); SGMatrix<float64_t> samples_matrix = samples->get_feature_matrix(); // compute the sample means SGVector<float64_t> samples_means = CStatistics::matrix_mean(samples_matrix, false); // compute the average difference between the sample means and the true means float64_t avg_diff = 0; for (int32_t i=0; i<num_features; i++) avg_diff += CMath::abs(means[i]-samples_means[i]); avg_diff /= num_features; SG_SINFO("Average difference = %f\n", avg_diff); // Clean up SG_UNREF(dbn); SG_UNREF(features); SG_UNREF(samples); exit_shogun(); return 0; }
#include <cstdio> #include <shogun/optimization/lbfgs/lbfgs.h> static lbfgsfloatval_t evaluate( void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step ) { int i; lbfgsfloatval_t fx = 0.0; for (i = 0;i < n;i += 2) { lbfgsfloatval_t t1 = 1.0 - x[i]; lbfgsfloatval_t t2 = 10.0 * (x[i+1] - x[i] * x[i]); g[i+1] = 20.0 * t2; g[i] = -2.0 * (x[i] * g[i+1] + t1); fx += t1 * t1 + t2 * t2; } return fx; } static int progress( void *instance, const lbfgsfloatval_t *x, const lbfgsfloatval_t *g, const lbfgsfloatval_t fx, const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm, const lbfgsfloatval_t step, int n, int k, int ls ) { printf("Iteration %d:\n", k); printf(" fx = %f, x[0] = %f, x[1] = %f\n", fx, x[0], x[1]); printf(" xnorm = %f, gnorm = %f, step = %f\n", xnorm, gnorm, step); printf("\n"); return 0; } #define N 100 int main(int argc, char *argv[]) { int i, ret = 0; lbfgsfloatval_t fx; lbfgsfloatval_t *x = lbfgs_malloc(N); lbfgs_parameter_t param; if (x == NULL) { printf("ERROR: Failed to allocate a memory block for variables.\n"); return 1; } /* Initialize the variables. */ for (i = 0;i < N;i += 2) { x[i] = -1.2; x[i+1] = 1.0; } /* Initialize the parameters for the L-BFGS optimization. */ lbfgs_parameter_init(¶m); /*param.linesearch = LBFGS_LINESEARCH_BACKTRACKING;*/ /* Start the L-BFGS optimization; this will invoke the callback functions evaluate() and progress() when necessary. */ ret = lbfgs(N, x, &fx, evaluate, progress, NULL, ¶m); /* Report the result. */ printf("L-BFGS optimization terminated with status code = %d\n", ret); printf(" fx = %f, x[0] = %f, x[1] = %f\n", fx, x[0], x[1]); lbfgs_free(x); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { /* number of features and their dimension */ const int32_t n=6; init_shogun(&print_message); /* create some random data */ SGMatrix<float64_t> matrix(n,n); for(int32_t i=0; i<n*n; ++i) matrix.matrix[i]=CMath::random((float64_t)-n,(float64_t)n); SGMatrix<float64_t>::display_matrix(matrix.matrix, n, n); /* create n n-dimensional feature vectors */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); /* create gaussian kernel with cache 10MB, width will be changed later */ CGaussianKernel* kernel = new CGaussianKernel(10, 2.0); kernel->init(features, features); /* create n labels (+1,-1,+1,-1,...) */ CBinaryLabels* labels=new CBinaryLabels(n); for (int32_t i=0; i<n; ++i) labels->set_label(i, i%2==0 ? +1 : -1); /* create libsvm with C=10 and produced labels */ CLibSVM* svm=new CLibSVM(10, kernel, labels); /* iterate over different width parameters */ for (int32_t k=0; k<10; ++k) { float64_t width=CMath::pow(2.0,k); float64_t log_width=CMath::log(width/2.0)/2.0; /* create parameter to change current kernel width */ Parameter* param=new Parameter(); param->add(&log_width, "log_width", ""); /* tell kernel to use the newly produced parameter */ kernel->m_parameters->set_from_parameters(param); SG_SPRINT("\n\ncurrent kernel width: 2^%d=%f\n", k, kernel->get_width()); /* print kernel matrix */ for (int32_t i=0; i<n; i++) { for (int32_t j=0; j<n; j++) SG_SPRINT("%f ", kernel->kernel(i,j)); SG_SPRINT("\n"); } /* train and classify */ svm->train(); for (int32_t i=0; i<n; ++i) SG_SPRINT("output[%d]=%f\treal[%d]=%f\n", i, svm->apply_one(i), i, labels->get_label(i)); delete param; } /* free up memory */ SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } /* number of features and their dimension, number of kernels */ int main(int argc, char** argv) { const int32_t n=7; init_shogun(&print_message); /* create some random data and hand it to each kernel */ SGMatrix<float64_t> matrix(n,n); for (int32_t k=0; k<n*n; ++k) matrix.matrix[k]=CMath::random((float64_t) -n, (float64_t) n); SG_SPRINT("feature data:\n"); SGMatrix<float64_t>::display_matrix(matrix.matrix, n, n); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); /* create n kernels with n features each */ CGaussianKernel** kernels=SG_MALLOC(CGaussianKernel*, n); for (int32_t i=0; i<n; ++i) { kernels[i]=new CGaussianKernel(10, CMath::random(0.0, (float64_t)n*n)); /* hand data to kernel */ kernels[i]->init(features, features); } /* create n parameter instances, each with one kernel */ Parameter** parameters=SG_MALLOC(Parameter*, n); for (int32_t i=0; i<n; ++i) { parameters[i]=new Parameter(); parameters[i]->add((CSGObject**)&kernels[i], "kernel", ""); } /* create n labels (+1,-1,+1,-1,...) */ CBinaryLabels* labels=new CBinaryLabels(n); for (int32_t i=0; i<n; ++i) labels->set_label(i, i%2==0 ? +1 : -1); /* create libsvm with C=10 and produced labels */ CLibSVM* svm=new CLibSVM(10, NULL, labels); /* iterate over all parameter instances and set them as subkernel */ for (int32_t k=0; k<n; ++k) { SG_SPRINT("\nkernel %d has width %f\n", k, kernels[k]->get_width()); /* change kernel, old one is UNREF'ed, new one is REF'ed */ svm->m_parameters->set_from_parameters(parameters[k]); /* train and classify with the different kernels */ svm->train(); for (int32_t i=0; i<n; ++i) SG_SPRINT("output[%d]=%f\treal[%d]=%f\n", i, svm->apply_one(i), i, labels->get_label(i)); } /* free up memory: delete all Parameter instances */ for (int32_t i=0; i<n; ++i) delete parameters[i]; /* delete created arrays */ SG_FREE(kernels); SG_FREE(parameters); /* this also handles features, labels, and last kernel in kernels[n-1] */ SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> #include <shogun/lib/SGStringList.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_modsel_parameters(CSGObject* object) { SGStringList<char> modsel_params=object->get_modelsel_names(); SG_SPRINT("Parameters of %s available for model selection:\n", object->get_name()); char* type_string=SG_MALLOC(char, 100); for (index_t i=0; i<modsel_params.num_strings; ++i) { /* extract current name, ddescription and type, and print them */ const char* name=modsel_params.strings[i].string; index_t index=object->get_modsel_param_index(name); TSGDataType type=object->m_model_selection_parameters->get_parameter( index)->m_datatype; type.to_string(type_string, 100); SG_SPRINT("\"%s\": \"%s\", %s\n", name, object->get_modsel_param_descr(name), type_string); } SG_FREE(type_string); SG_SPRINT("\n"); } int main(int argc, char** argv) { init_shogun(&print_message); #ifndef HAVE_LAPACK CSGObject* object; object=new CLibSVM(); print_modsel_parameters(object); SG_UNREF(object); object=new CLibLinear(); print_modsel_parameters(object); SG_UNREF(object); object=new CDistantSegmentsKernel(); print_modsel_parameters(object); SG_UNREF(object); object=new CGaussianKernel(); print_modsel_parameters(object); SG_UNREF(object); object=new CPowerKernel(); print_modsel_parameters(object); SG_UNREF(object); object=new CMinkowskiMetric(); print_modsel_parameters(object); SG_UNREF(object); #endif // HAVE_LAPACK exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/io/SGIO.h> #include <shogun/mathematics/Math.h> #include <shogun/base/Parameter.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> #include <shogun/kernel/GaussianKernel.h> using namespace shogun; int32_t max=3; const float64_t initial_value=1; const float64_t another_value=2; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } bool test_float_scalar() { bool result=true; Parameter* original_parameter_list=new Parameter(); float64_t original_parameter=initial_value; original_parameter_list->add(&original_parameter, "param", ""); float64_t new_parameter=another_value; Parameter* new_parameter_list=new Parameter(); new_parameter_list->add(&new_parameter, "param", ""); original_parameter_list->set_from_parameters(new_parameter_list); result&=original_parameter==another_value; delete original_parameter_list; delete new_parameter_list; return result; } bool test_float_vector() { bool result=true; Parameter* original_parameter_list=new Parameter(); float64_t* original_parameter=SG_MALLOC(float64_t, max); SGVector<float64_t>::fill_vector(original_parameter, max, initial_value); original_parameter_list->add_vector(&original_parameter, &max, "param", ""); float64_t* new_parameter=SG_MALLOC(float64_t, max); SGVector<float64_t>::fill_vector(new_parameter, max, another_value); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_vector(&new_parameter, &max, "param", ""); original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max; ++i) result&=original_parameter[i]==another_value; delete original_parameter; delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } bool test_float_matrix() { bool result=true; Parameter* original_parameter_list=new Parameter(); float64_t* original_parameter=SG_MALLOC(float64_t, max*max); SGVector<float64_t>::fill_vector(original_parameter, max*max, initial_value); original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", ""); float64_t* new_parameter=SG_MALLOC(float64_t, max*max); SGVector<float64_t>::fill_vector(new_parameter, max*max, another_value); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", ""); original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max*max; ++i) result&=original_parameter[i]==another_value; delete original_parameter; delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } bool test_sgobject_scalar() { bool result=true; Parameter* original_parameter_list=new Parameter(); CSGObject* original_parameter=new CGaussianKernel(10, 10); SG_REF(original_parameter); original_parameter_list->add(&original_parameter, "kernel", ""); CSGObject* new_parameter=new CDistantSegmentsKernel(10, 10, 10); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add(&new_parameter, "kernel", ""); /* note: old_parameter is SG_UNREF'ed, new one SG_REF'ed */ original_parameter_list->set_from_parameters(new_parameter_list); result&=original_parameter==new_parameter; /* old original kernel was deleted by shogun's SG_UNREF */ SG_UNREF(new_parameter); delete original_parameter_list; delete new_parameter_list; return result; } bool test_sgobject_vector() { bool result=true; Parameter* original_parameter_list=new Parameter(); CSGObject** original_parameter=SG_MALLOC(CSGObject*, max); for (int32_t i=0; i<max; ++i) { original_parameter[i]=new CDistantSegmentsKernel(1, 1, 1); SG_REF(original_parameter[i]); } original_parameter_list->add_vector(&original_parameter, &max, "param", ""); CSGObject** new_parameter=SG_MALLOC(CSGObject*, max); for (int32_t i=0; i<max; ++i) new_parameter[i]=new CDistantSegmentsKernel(2, 2, 2); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_vector(&new_parameter, &max, "param", ""); /* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */ original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max; ++i) result&=original_parameter[i]==new_parameter[i]; /* old original kernels were deleted by shogun's SG_UNREF */ delete original_parameter; for (int32_t i=0; i<max; ++i) SG_UNREF(new_parameter[i]); delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } bool test_sgobject_matrix() { bool result=true; Parameter* original_parameter_list=new Parameter(); CSGObject** original_parameter=SG_MALLOC(CSGObject*, max*max); for (int32_t i=0; i<max; ++i) { for (int32_t j=0; j<max; ++j) { original_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1); SG_REF(original_parameter[j*max+i]); } } original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", ""); CSGObject** new_parameter=SG_MALLOC(CSGObject*, max*max); for (int32_t i=0; i<max; ++i) { for (int32_t j=0; j<max; ++j) new_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1); } Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", ""); /* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */ original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max; ++i) { for (int32_t j=0; j<max; ++j) result&=original_parameter[j*max+i]==new_parameter[j*max+i]; } /* old original kernels were deleted by shogun's SG_UNREF */ delete original_parameter; for (int32_t i=0; i<max*max; ++i) SG_UNREF(new_parameter[i]); delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); bool result=true; /* test wheater set_from_parameters works for these types */ result&=test_float_scalar(); result&=test_sgobject_scalar(); result&=test_sgobject_vector(); result&=test_sgobject_matrix(); result&=test_float_matrix(); result&=test_float_vector(); if (result) SG_SPRINT("SUCCESS!\n") else SG_SPRINT("FAILURE!\n") exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2014 Abhijeet Kislay * Copyright (C) 2014 Abhijeet Kislay */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/preprocessor/FisherLDA.h> #include <shogun/features/DenseFeatures.h> #include <shogun/lib/common.h> #include <shogun/features/DataGenerator.h> using namespace shogun; #define NUM 50 #define DIMS 2 #define CLASSES 2 void test() { SGVector<float64_t> lab(CLASSES*NUM); SGMatrix<float64_t> feat(DIMS, CLASSES*NUM); feat=CDataGenerator::generate_gaussians(NUM,CLASSES,DIMS); for(int i=0; i<CLASSES; ++i) for(int j=0; j<NUM; ++j) lab[i*NUM+j]=double(i); // Create train labels CMulticlassLabels* labels=new CMulticlassLabels(lab); SG_REF(labels) // Create train features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(feat); SG_REF(features) // Initiate the FisherLDA class CFisherLDA* fisherlda=new CFisherLDA(AUTO_FLDA); SG_REF(fisherlda) fisherlda->fit(features, labels, 1); SGMatrix<float64_t> y=fisherlda->apply_to_feature_matrix(features); // display output y.display_matrix(); SG_UNREF(fisherlda) SG_UNREF(features) SG_UNREF(labels) } int main(int argc, char ** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2008-2010 Soeren Sonnenburg, Alexander Binder * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max Planck Society * Copyright (C) 2010 Berlin Institute of Technology */ #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/preproc/RandomFourierGaussPreproc.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/lib/Mathematics.h> #include <shogun/lib/common.h> #include <shogun/base/init.h> #include <stdlib.h> #include <stdio.h> #include <vector> #include <iostream> #include <algorithm> #include <ctime> using namespace shogun; void gen_rand_data(float64_t* & feat, float64_t* & lab,const int32_t num,const int32_t dims,const float64_t dist) { lab=SG_MALLOC(float64_t, num); feat=SG_MALLOC(float64_t, num*dims); for (int32_t i=0; i<num; i++) { if (i<num/2) { lab[i]=-1.0; for (int32_t j=0; j<dims; j++) feat[i*dims+j]=CMath::random(0.0,1.0)+dist; } else { lab[i]=1.0; for (int32_t j=0; j<dims; j++) feat[i*dims+j]=CMath::random(0.0,1.0)-dist; } } CMath::display_vector(lab,num); CMath::display_matrix(feat,dims, num); } int main() { time_t a,b; int32_t dims=6000; float64_t dist=0.5; int32_t randomfourier_featurespace_dim=500; // the typical application of the below preprocessor are cases with high input dimensionalities of some thousands int32_t numtr=3000; int32_t numte=3000; const int32_t feature_cache=0; const int32_t kernel_cache=0; // important trick for RFgauss to work: kernel width is set such that average inner kernel distance is close one // the rfgauss approximation breaks down if average inner kernel distances (~~ kernel width to small compared to variance of data) are too large // try rbf_width=0.1 to see how it fails! - you will see the problem in the large number of negative kernel entries (numnegratio) for the rfgauss linear kernel const float64_t rbf_width=4000; const float64_t svm_C=10; const float64_t svm_eps=0.001; init_shogun(); float64_t* feattr(NULL); float64_t* labtr(NULL); a=time(NULL); std::cout << "generating train data"<<std::endl; gen_rand_data(feattr,labtr,numtr,dims,dist); float64_t* feattr2=SG_MALLOC(float64_t, numtr*dims); std::copy(feattr,feattr+numtr*dims,feattr2); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; float64_t* featte(NULL); float64_t* labte(NULL); a=time(NULL); std::cout << "generating test data"<<std::endl; gen_rand_data(featte,labte,numte,dims,dist); float64_t* featte2=SG_MALLOC(float64_t, numtr*dims); std::copy(featte,featte+numtr*dims,featte2); float64_t* featte3=SG_MALLOC(float64_t, numtr*dims); std::copy(featte,featte+numtr*dims,featte3); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // create train labels CLabels* labelstr=new CLabels(); labelstr->set_labels(labtr, numtr); SG_REF(labelstr); // create train features a=time(NULL); std::cout << "initializing shogun train feature"<<std::endl; CDenseFeatures<float64_t>* featurestr1 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featurestr1); featurestr1->set_feature_matrix(feattr, dims, numtr); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // create gaussian kernel // std::cout << "computing gaussian train kernel"<<std::endl; CGaussianKernel* kerneltr1 = new CGaussianKernel(kernel_cache, rbf_width); SG_REF(kerneltr1); kerneltr1->init(featurestr1, featurestr1); // create svm via libsvm and train CLibSVM* svm1 = new CLibSVM(svm_C, kerneltr1, labelstr); SG_REF(svm1); svm1->set_epsilon(svm_eps); a=time(NULL); std::cout << "training SVM over gaussian kernel"<<std::endl; svm1->train(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; printf("num_sv:%d b:%f\n", svm1->get_num_support_vectors(), svm1->get_bias()); a=time(NULL); std::cout << "initializing shogun test feature"<<std::endl; CDenseFeatures<float64_t>* featureste1 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featureste1); featureste1->set_feature_matrix(featte, dims, numte); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; //std::cout << "computing gaussian test kernel"<<std::endl; CGaussianKernel* kernelte1 = new CGaussianKernel(kernel_cache, rbf_width); SG_REF(kernelte1); kernelte1->init(featurestr1, featureste1); svm1->set_kernel(kernelte1); a=time(NULL); std::cout << "scoring gaussian test kernel"<<std::endl; std::vector<float64_t> scoreste1(numte); float64_t err1=0; for(int32_t i=0; i< numte ;++i) { scoreste1[i]=svm1->classify_example(i); if(scoreste1[i]*labte[i]<0) { err1+=1.0/numte; } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // *************************************** // now WITH the preprocessor a=time(NULL); std::cout << "initializing preprocessor"<<std::endl; CRandomFourierGaussPreproc *rfgauss=new CRandomFourierGaussPreproc; SG_REF(rfgauss); rfgauss->get_io()->set_loglevel(MSG_DEBUG); // ************************************************************ // set parameters of the preprocessor // ******************************** !!!!!!!!!!!!!!!!! CMath::sqrt(rbf_width/2.0) rfgauss->set_kernelwidth( CMath::sqrt(rbf_width/2.0) ); rfgauss->set_dim_input_space(dims); rfgauss->set_dim_feature_space(randomfourier_featurespace_dim); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // create train features a=time(NULL); std::cout << "initializing shogun train feature again"<<std::endl; CDenseFeatures<float64_t>* featurestr2 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featurestr2); featurestr2->set_feature_matrix(feattr2, dims, numtr); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // ************************************************************ // use preprocessor // ************************************************************** // add preprocessor featurestr2->add_preproc(rfgauss); // apply preprocessor a=time(NULL); std::cout << "applying preprocessor to train feature"<<std::endl; featurestr2->apply_preproc(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // save random coefficients and state data of preprocessor for use with a new preprocessor object (see lines following "// now the same with a new preprocessor to show the usage of set_randomcoefficients" // Alternative: use built-in serialization to load and save state data from/to a file!!! float64_t *randomcoeff_additive2, * randomcoeff_multiplicative2; int32_t dim_feature_space2,dim_input_space2; float64_t kernelwidth2; rfgauss->get_randomcoefficients(&randomcoeff_additive2, &randomcoeff_multiplicative2, &dim_feature_space2, &dim_input_space2, &kernelwidth2); // create linear kernel //std::cout << "computing linear train kernel over preprocessed features"<<std::endl; CLinearKernel* kerneltr2 = new CLinearKernel(); SG_REF(kerneltr2); kerneltr2->init(featurestr2, featurestr2); // create svm via libsvm and train CLibSVM* svm2 = new CLibSVM(svm_C, kerneltr2, labelstr); SG_REF(svm2); svm2->set_epsilon(svm_eps); a=time(NULL); std::cout << "training SVM over linear kernel over preprocessed features"<<std::endl; svm2->train(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; printf("num_sv:%d b:%f\n", svm2->get_num_support_vectors(), svm2->get_bias()); a=time(NULL); std::cout << "initializing shogun test feature again"<<std::endl; CDenseFeatures<float64_t>* featureste2 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featureste2); featureste2->set_feature_matrix(featte2, dims, numte); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // ************************************************************ // use preprocessor // ************************************************************** CRandomFourierGaussPreproc *rfgauss2=new CRandomFourierGaussPreproc; SG_REF(rfgauss2); rfgauss2->get_io()->set_loglevel(MSG_DEBUG); // add preprocessor featureste2->add_preproc(rfgauss); // apply preprocessor a=time(NULL); std::cout << "applying same preprocessor to test feature"<<std::endl; featureste2->apply_preproc(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; //std::cout << "computing linear test kernel over preprocessed features"<<std::endl; CLinearKernel* kernelte2 = new CLinearKernel(); SG_REF(kernelte2); kernelte2->init(featurestr2, featureste2); //std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; svm2->set_kernel(kernelte2); a=time(NULL); std::cout << "scoring linear test kernel over preprocessed features"<<std::endl; std::vector<float64_t> scoreste2(numte); float64_t err2=0; for(int32_t i=0; i< numte ;++i) { scoreste2[i]=svm2->classify_example(i); if(scoreste2[i]*labte[i]<0) { err2+=1.0/numte; } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; std::cout << "pausing 12 seconds"<<std::endl; sleep(12); // ************************************************************ // compare results // ************************************************************** int32_t num_labeldiffs=0; float64_t avg_scorediff=0; for(int32_t i=0; i< numte ;++i) { if( (int32_t)CMath::sign(scoreste1[i]) != (int32_t)CMath::sign(scoreste2[i])) { ++num_labeldiffs; } avg_scorediff+=CMath::abs(scoreste1[i]-scoreste2[i])/numte; std::cout<< "at sample i"<< i <<" label 1= " << CMath::sign(scoreste1[i]) <<" label 2= " << CMath::sign(scoreste2[i])<< " scorediff " << scoreste1[i] << " - " <<scoreste2[i] <<" = " << CMath::abs(scoreste1[i]-scoreste2[i])<<std::endl; } std::cout << "usedwidth for rbf kernel"<< kerneltr1->get_width() << " " << kernelte1->get_width()<<std::endl; std::cout<< "number of different labels between gaussian kernel and rfgauss "<< num_labeldiffs<< " out of "<< numte << " labels "<<std::endl; std::cout<< "average test sample SVM output score difference between gaussian kernel and rfgauss "<< avg_scorediff<<std::endl; std::cout<< "classification errors gaussian kernel and rfgauss "<< err1 << " " <<err2<<std::endl; a=time(NULL); std::cout << "computing effective kernel widths (means of inner distances)"<<std::endl; int32_t m, n; float64_t * kertr1; kerneltr1->get_kernel_matrix ( &kertr1, &m, &n); std::cout << "kernel size "<< m << " "<< n <<std::endl; float64_t avgdist1=0; for(int i=0; i<m ;++i) { for(int l=0; l<i ;++l) { avgdist1+= -CMath::log(kertr1[i+l*m])*2.0/m/(m+1.0); } } float64_t * kertr2; kerneltr2->get_kernel_matrix (&kertr2,&m, &n); float64_t avgdist2=0; float64_t numnegratio=0; for(int i=0; i<m ;++i) { for(int l=0; l<i ;++l) { if(kertr2[i+l*m]<=0) { numnegratio+=2.0/m/(m+1.0); } else { avgdist2+= -CMath::log(std::max(kertr2[i+l*m],1e-10))*2.0/m/(m+1.0); } } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; std::cout << "effective kernel width for gaussian kernel and RFgauss "<< avgdist1 << " " <<avgdist2/(1.0-numnegratio) << std::endl<< " numnegratio (negative entries in RFgauss approx kernel)"<< numnegratio<<std::endl; // ********************************************** // now the same with a new preprocessor to show the usage of set_randomcoefficients // ********************************************8 CDenseFeatures<float64_t>* featureste3 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featureste3); featureste3->set_feature_matrix(featte3, dims, numte); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // ************************************************************ // use preprocessor // ************************************************************** rfgauss2->set_randomcoefficients( randomcoeff_additive2, randomcoeff_multiplicative2, dim_feature_space2, dim_input_space2, kernelwidth2); // add preprocessor featureste3->add_preproc(rfgauss2); // apply preprocessor a=time(NULL); std::cout << "applying same preprocessor to test feature"<<std::endl; featureste3->apply_preproc(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; //std::cout << "computing linear test kernel over preprocessed features"<<std::endl; CLinearKernel* kernelte3 = new CLinearKernel(); SG_REF(kernelte3); kernelte2->init(featurestr2, featureste3); //std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; svm2->set_kernel(kernelte3); a=time(NULL); std::cout << "scoring linear test kernel over preprocessed features"<<std::endl; std::vector<float64_t> scoreste3(numte); float64_t err3=0; for(int32_t i=0; i< numte ;++i) { scoreste3[i]=svm2->classify_example(i); if(scoreste3[i]*labte[i]<0) { err3+=1.0/numte; } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; std::cout << "pausing 12 seconds"<<std::endl; sleep(12); // ************************************************************ // compare results // ************************************************************** num_labeldiffs=0; avg_scorediff=0; for(int32_t i=0; i< numte ;++i) { if( (int32_t)CMath::sign(scoreste1[i]) != (int32_t)CMath::sign(scoreste3[i])) { ++num_labeldiffs; } avg_scorediff+=CMath::abs(scoreste1[i]-scoreste3[i])/numte; std::cout<< "at sample i"<< i <<" label 1= " << CMath::sign(scoreste1[i]) <<" label 2= " << CMath::sign(scoreste3[i])<< " scorediff " << scoreste1[i] << " - " <<scoreste3[i] <<" = " << CMath::abs(scoreste1[i]-scoreste3[i])<<std::endl; } std::cout<< "number of different labels between gaussian kernel and rfgauss "<< num_labeldiffs<< " out of "<< numte << " labels "<<std::endl; std::cout<< "average test sample SVM output score difference between gaussian kernel and rfgauss "<< avg_scorediff<<std::endl; std::cout<< "classification errors gaussian kernel and rfgauss "<< err1 << " " <<err3<<std::endl; SG_FREE(randomcoeff_additive2); SG_FREE(randomcoeff_multiplicative2); SG_FREE(labtr); SG_FREE(labte); SG_FREE(kertr1); SG_FREE(kertr2); SG_UNREF(labelstr); SG_UNREF(kerneltr1); SG_UNREF(kerneltr2); SG_UNREF(kernelte1); SG_UNREF(kernelte2); SG_UNREF(kernelte3); SG_UNREF(featurestr1); SG_UNREF(featurestr2); SG_UNREF(featureste1); SG_UNREF(featureste2); SG_UNREF(featureste3); SG_UNREF(svm1); SG_UNREF(svm2); SG_UNREF(rfgauss); SG_UNREF(rfgauss2); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the Vowpal Wabbit learning algorithm. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingAsciiFile.h> #include <shogun/features/StreamingDenseFeatures.h> #include <shogun/multiclass/tree/RandomConditionalProbabilityTree.h> using namespace shogun; int main() { init_shogun_with_defaults(); const char* train_file_name = "../data/7class_example4_train.dense"; const char* test_file_name = "../data/7class_example4_test.dense"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024); SG_REF(train_features); CRandomConditionalProbabilityTree *cpt = new CRandomConditionalProbabilityTree(); cpt->set_num_passes(1); cpt->set_features(train_features); cpt->train(); cpt->print_tree(); CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *pred = cpt->apply_multiclass(test_features); test_features->reset_stream(); SG_SPRINT("num_labels = %d\n", pred->get_num_labels()); SG_UNREF(test_features); SG_UNREF(test_file); test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels()); test_features->start_parser(); for (int32_t i=0; i < pred->get_num_labels(); ++i) { test_features->get_next_example(); gnd->set_int_label(i, test_features->get_label()); test_features->release_example(); } test_features->end_parser(); int32_t n_correct = 0; for (index_t i=0; i < pred->get_num_labels(); ++i) { if (pred->get_int_label(i) == gnd->get_int_label(i)) n_correct++; //SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i)); } SG_SPRINT("\n"); SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels()); SG_UNREF(train_features); SG_UNREF(test_features); SG_UNREF(train_file); SG_UNREF(test_file); SG_UNREF(cpt); SG_UNREF(pred); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Evangelos Anagnostopoulos * Copyright (C) 2013 Evangelos Anagnostopoulos * * This example demonstrates the use of the Random Fourier Dot Features with * a linear classifier. */ #include <shogun/base/init.h> #include <shogun/features/RandomFourierDotFeatures.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/classifier/svm/SVMOcas.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/evaluation/PRCEvaluation.h> using namespace shogun; void load_data(int32_t num_dim, int32_t num_vecs, CDenseFeatures<float64_t>*& feats, CBinaryLabels*& labels) { SGMatrix<float64_t> mat(num_dim, num_vecs); SGVector<float64_t> labs(num_vecs); for (index_t i=0; i<num_vecs; i++) { for (index_t j=0; j<num_dim; j++) { if ((i+j)%2==0) { labs[i] = -1; mat(j,i) = CMath::random(0,1) + 0.5; } else { labs[i] = 1; mat(j,i) = CMath::random(0,1) - 0.5; } } } feats = new CDenseFeatures<float64_t>(mat); labels = new CBinaryLabels(labs); } int main(int argv, char** argc) { init_shogun_with_defaults(); int32_t num_dim = 100; int32_t num_vecs = 10000; CDenseFeatures<float64_t>* dense_feats = 0; CBinaryLabels* labels = 0; load_data(num_dim, num_vecs, dense_feats, labels); /** Specifying the kernel parameter for the Gaussian approximation of RFFeatures, * as specified in its documentation in KernelName. * We set the kernel width of the Gaussian kernel we are approximating to 8. */ SGVector<float64_t> params(1); params[0] = 8; /** Specifying the number of samples for the RFFeatures */ int32_t D = 300; /** Creating a new RandomFourierDotFeatures object, that will work on * the data that we created before, will use D number of samples and * will generate parameters for a Gaussian Kernel approximation of * width given in params */ CRandomFourierDotFeatures* rf_feats = new CRandomFourierDotFeatures( dense_feats, D, KernelName::GAUSSIAN, params); /** Now the previous RFFeatures object can be used with a linear * classifier */ //CLibLinear* lin_svm = new CLibLinear(C, r_feats, labels); float64_t C = 0.1; float64_t epsilon = 0.01; CSVMOcas* lin_svm = new CSVMOcas(C, rf_feats, labels); lin_svm->set_epsilon(epsilon); lin_svm->train(); CBinaryLabels* predicted = CLabelsFactory::to_binary(lin_svm->apply()); CPRCEvaluation* evaluator = new CPRCEvaluation(); float64_t auPRC = evaluator->evaluate(predicted, labels); //SG_SPRINT("Training auPRC = %f\n", auPRC); SG_UNREF(lin_svm); SG_UNREF(predicted); exit_shogun(); }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #include <shogun/lib/config.h> // temporally disabled, since API was changed #if defined(HAVE_NLOPT) && 0 #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/LinearARDKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/machine/gp/ExactInferenceMethod.h> #include <shogun/machine/gp/GaussianLikelihood.h> #include <shogun/machine/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CKernel* kernel, SGVector<float64_t>& weights) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c2); CModelSelectionParameters* c3=new CModelSelectionParameters("sigma"); c2->append_child(c3); c3->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c4=new CModelSelectionParameters("scale"); c1->append_child(c4); c4->build_values(1.0, 1.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CModelSelectionParameters* c6 = new CModelSelectionParameters("weights"); c5->append_child(c6); c6->build_values_sgvector(0.001, 4.0, R_LINEAR, &weights); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGVector<float64_t> weights(dim_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); /* create testing features */ CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> (); features2->set_feature_matrix(matrix2); SG_REF(features); SG_REF(features2); SG_REF(labels); /*Allocate our Kernel*/ CLinearARDKernel* test_kernel = new CLinearARDKernel(10); test_kernel->init(features, features); /*Allocate our mean function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our likelihood function*/ CGaussianLikelihood* lik = new CGaussianLikelihood(); /*Allocate our inference method*/ CExactInferenceMethod* inf = new CExactInferenceMethod(test_kernel, features, mean, labels, lik); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf); SG_REF(gp); /*Build the parameter tree for model selection*/ CModelSelectionParameters* root = build_tree(inf, lik, test_kernel, weights); /*Criterion for gradient search*/ CGradientCriterion* crit = new CGradientCriterion(); /*This will evaluate our inference method for its derivatives*/ CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels, crit); grad->set_function(inf); gp->print_modsel_params(); root->print_tree(); /* handles all of the above structures in memory */ CGradientModelSelection* grad_search=new CGradientModelSelection( root, grad); /* set autolocking to false to get rid of warnings */ grad->set_autolock(false); /*Search for best parameters*/ CParameterCombination* best_combination=grad_search->select_model(true); /*Output all the results and information*/ if (best_combination) { SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(gp); } CGradientResult* result=(CGradientResult*)grad->evaluate(); if(result->get_result_type() != GRADIENTEVALUATION_RESULT) SG_SERROR("Evaluation result not a GradientEvaluationResult!"); result->print_result(); SGVector<float64_t> alpha = inf->get_alpha(); SGVector<float64_t> labe = labels->get_labels(); SGVector<float64_t> diagonal = inf->get_diagonal_vector(); SGMatrix<float64_t> cholesky = inf->get_cholesky(); CRegressionLabels* predictions=gp->apply_regression(features); SGVector<float64_t> variance_vector=gp->get_variance_vector(features); alpha.display_vector("Alpha Vector"); labe.display_vector("Labels"); diagonal.display_vector("sW Matrix"); variance_vector.display_vector("Predicted Variances"); predictions->get_labels().display_vector("Mean Predictions"); cholesky.display_matrix("Cholesky Matrix L"); matrix.display_matrix("Training Features"); matrix2.display_matrix("Testing Features"); /*free memory*/ SG_UNREF(features); SG_UNREF(features2); SG_UNREF(predictions); SG_UNREF(labels); SG_UNREF(inf); SG_UNREF(gp); SG_UNREF(grad_search); SG_UNREF(best_combination); SG_UNREF(result); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #ifdef USE_GPL_SHOGUN #include <shogun/lib/config.h> // temporally disabled, since API was changed #if defined(HAVE_NLOPT) && 0 #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/machine/gp/FITCInferenceMethod.h> #include <shogun/machine/gp/GaussianLikelihood.h> #include <shogun/machine/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CKernel* kernel) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("scale"); c1 ->append_child(c2); c2->build_values(0.01, 4.0, R_LINEAR); CModelSelectionParameters* c3 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c3); CModelSelectionParameters* c4=new CModelSelectionParameters("sigma"); c3->append_child(c4); c4->build_values(0.01, 4.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CModelSelectionParameters* c6 = new CModelSelectionParameters("width"); c5->append_child(c6); c6->build_values(0.01, 4.0, R_LINEAR); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); /* create testing features */ CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> (); features2->set_feature_matrix(matrix2); SG_REF(labels); /*Allocate our Kernel*/ CGaussianKernel* test_kernel = new CGaussianKernel(10, 2); test_kernel->init(features, features); /*Allocate our mean function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our likelihood function*/ CGaussianLikelihood* lik = new CGaussianLikelihood(); //SG_SPRINT("features2 bef inf rc= %d\n",features2->ref_count()); /*Allocate our inference method*/ CFITCInferenceMethod* inf = new CFITCInferenceMethod(test_kernel, features, mean, labels, lik, features2); //SG_SPRINT("features2 aft inf rc= %d\n",features2->ref_count()); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf); SG_REF(gp); /*Build the parameter tree for model selection*/ CModelSelectionParameters* root = build_tree(inf, lik, test_kernel); /*Criterion for gradient search*/ CGradientCriterion* crit = new CGradientCriterion(); /*This will evaluate our inference method for its derivatives*/ CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels, crit); grad->set_function(inf); gp->print_modsel_params(); root->print_tree(); /* handles all of the above structures in memory */ CGradientModelSelection* grad_search=new CGradientModelSelection( root, grad); /* set autolocking to false to get rid of warnings */ grad->set_autolock(false); /*Search for best parameters*/ CParameterCombination* best_combination=grad_search->select_model(true); /*Output all the results and information*/ if (best_combination) { SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(gp); } CGradientResult* result=(CGradientResult*)grad->evaluate(); if(result->get_result_type() != GRADIENTEVALUATION_RESULT) SG_SERROR("Evaluation result not a GradientEvaluationResult!"); result->print_result(); SGVector<float64_t> alpha = inf->get_alpha(); SGVector<float64_t> labe = labels->get_labels(); SGVector<float64_t> diagonal = inf->get_diagonal_vector(); SGMatrix<float64_t> cholesky = inf->get_cholesky(); CRegressionLabels* predictions=gp->apply_regression(features); SGVector<float64_t> variance_vector=gp->get_variance_vector(features); alpha.display_vector("Alpha Vector"); labe.display_vector("Labels"); diagonal.display_vector("sW Matrix"); variance_vector.display_vector("Predicted Variances"); predictions->get_labels().display_vector("Mean Predictions"); cholesky.display_matrix("Cholesky Matrix L"); matrix.display_matrix("Training Features"); matrix2.display_matrix("Testing Features"); /*free memory*/ SG_UNREF(predictions); SG_UNREF(labels); SG_UNREF(inf); SG_UNREF(gp); SG_UNREF(grad_search); SG_UNREF(best_combination); SG_UNREF(result); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #include <shogun/lib/config.h> // temporally disabled, since API was changed #if defined(HAVE_NLOPT) && 0 #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/machine/gp/LaplacianInferenceMethod.h> #include <shogun/machine/gp/StudentsTLikelihood.h> #include <shogun/machine/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CKernel* kernel) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("scale"); c1 ->append_child(c2); c2->build_values(0.5, 4.0, R_LINEAR); CModelSelectionParameters* c3 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c3); CModelSelectionParameters* c4=new CModelSelectionParameters("sigma"); c3->append_child(c4); c4->build_values(0.01, 4.0, R_LINEAR); CModelSelectionParameters* c43=new CModelSelectionParameters("df"); c3->append_child(c43); c43->build_values(500.0, 1000.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CModelSelectionParameters* c6 = new CModelSelectionParameters("width"); c5->append_child(c6); c6->build_values(0.01, 4.0, R_LINEAR); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); /* create testing features */ CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> (); features2->set_feature_matrix(matrix2); SG_REF(features); SG_REF(features2); SG_REF(labels); /*Allocate our Kernel*/ CGaussianKernel* test_kernel = new CGaussianKernel(10, 2); test_kernel->init(features, features); /*Allocate our mean function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our likelihood function*/ CStudentsTLikelihood* lik = new CStudentsTLikelihood(); /*Allocate our inference method*/ CLaplacianInferenceMethod* inf = new CLaplacianInferenceMethod(test_kernel, features, mean, labels, lik); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf); SG_REF(gp); /*Build the parameter tree for model selection*/ CModelSelectionParameters* root = build_tree(inf, lik, test_kernel); /*Criterion for gradient search*/ CGradientCriterion* crit = new CGradientCriterion(); /*This will evaluate our inference method for its derivatives*/ CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels, crit); grad->set_function(inf); gp->print_modsel_params(); root->print_tree(); /* handles all of the above structures in memory */ CGradientModelSelection* grad_search=new CGradientModelSelection( root, grad); /* set autolocking to false to get rid of warnings */ grad->set_autolock(false); /*Search for best parameters*/ CParameterCombination* best_combination=grad_search->select_model(true); /*Output all the results and information*/ if (best_combination) { SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(gp); } CGradientResult* result=(CGradientResult*)grad->evaluate(); if(result->get_result_type() != GRADIENTEVALUATION_RESULT) SG_SERROR("Evaluation result not a GradientEvaluationResult!"); result->print_result(); SGVector<float64_t> alpha = inf->get_alpha(); SGVector<float64_t> labe = labels->get_labels(); SGVector<float64_t> diagonal = inf->get_diagonal_vector(); SGMatrix<float64_t> cholesky = inf->get_cholesky(); CRegressionLabels* predictions=gp->apply_regression(features); SGVector<float64_t> variance_vector=gp->get_variance_vector(features); alpha.display_vector("Alpha Vector"); labe.display_vector("Labels"); diagonal.display_vector("sW Matrix"); variance_vector.display_vector("Predicted Variances"); predictions->get_labels().display_vector("Mean Predictions"); cholesky.display_matrix("Cholesky Matrix L"); matrix.display_matrix("Training Features"); matrix2.display_matrix("Testing Features"); /*free memory*/ SG_UNREF(features); SG_UNREF(features2); SG_UNREF(predictions); SG_UNREF(labels); SG_UNREF(inf); SG_UNREF(gp); SG_UNREF(grad_search); SG_UNREF(best_combination); SG_UNREF(result); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #if defined(HAVE_NLOPT) #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/machine/gp/ExactInferenceMethod.h> #include <shogun/machine/gp/GaussianLikelihood.h> #include <shogun/machine/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> #include <shogun/kernel/ProductKernel.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } /* HEIKO FIXME CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CProductKernel* kernel) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("scale"); c1 ->append_child(c2); c2->build_values(0.99, 1.01, R_LINEAR); CModelSelectionParameters* c3 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c3); CModelSelectionParameters* c4=new CModelSelectionParameters("sigma"); c3->append_child(c4); c4->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CList* list = kernel->get_list(); CModelSelectionParameters* cc1 = new CModelSelectionParameters("kernel_list", list); c5->append_child(cc1); CListElement* first = NULL; CSGObject* k = list->get_first_element(first); SG_UNREF(k); SG_REF(first); CModelSelectionParameters* cc2 = new CModelSelectionParameters("first", first); cc1->append_child(cc2); CKernel* sub_kernel1 = kernel->get_kernel(0); CModelSelectionParameters* cc3 = new CModelSelectionParameters("data", sub_kernel1); cc2->append_child(cc3); SG_UNREF(sub_kernel1); CListElement* second = first; k = list->get_next_element(second); SG_UNREF(k); SG_REF(second); CModelSelectionParameters* cc4 = new CModelSelectionParameters("next", second); cc2->append_child(cc4); CKernel* sub_kernel2 = kernel->get_kernel(1); CModelSelectionParameters* cc5 = new CModelSelectionParameters("data", sub_kernel2); cc4->append_child(cc5); SG_UNREF(sub_kernel2); CListElement* third = second; k = list->get_next_element(third); SG_UNREF(k); SG_REF(third); CModelSelectionParameters* cc6 = new CModelSelectionParameters("next", third); cc4->append_child(cc6); CKernel* sub_kernel3 = kernel->get_kernel(2); CModelSelectionParameters* cc7 = new CModelSelectionParameters("data", sub_kernel3); cc6->append_child(cc7); SG_UNREF(sub_kernel3); CModelSelectionParameters* c6 = new CModelSelectionParameters("width"); cc3->append_child(c6); c6->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c66 = new CModelSelectionParameters("combined_kernel_weight"); cc3->append_child(c66); c66->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c7 = new CModelSelectionParameters("width"); cc5->append_child(c7); c7->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c77 = new CModelSelectionParameters("combined_kernel_weight"); cc5->append_child(c77); c77->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c8 = new CModelSelectionParameters("width"); cc7->append_child(c8); c8->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c88 = new CModelSelectionParameters("combined_kernel_weight"); cc7->append_child(c88); c88->build_values(0.001, 1.0, R_LINEAR); SG_UNREF(list); return root; } */ int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); CProductKernel* test_kernel = new CProductKernel(); CGaussianKernel* sub_kernel1 = new CGaussianKernel(10, 2); CGaussianKernel* sub_kernel2 = new CGaussianKernel(10, 2); CGaussianKernel* sub_kernel3 = new CGaussianKernel(10, 2); test_kernel->append_kernel(sub_kernel1); test_kernel->append_kernel(sub_kernel2); test_kernel->append_kernel(sub_kernel3); SG_REF(comb_features); SG_REF(labels); /*Allocate our Mean Function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our Likelihood Model*/ CGaussianLikelihood* lik = new CGaussianLikelihood(); /*Allocate our inference method*/ CExactInferenceMethod* inf = new CExactInferenceMethod(test_kernel, comb_features, mean, labels, lik); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf); SG_REF(gp); //CModelSelectionParameters* root = build_tree(inf, lik, test_kernel); // ///*Criterion for gradient search*/ //CGradientCriterion* crit = new CGradientCriterion(); ///*This will evaluate our inference method for its derivatives*/ //CGradientEvaluation* grad=new CGradientEvaluation(gp, comb_features, labels, // crit); //grad->set_function(inf); //gp->print_modsel_params(); //root->print_tree(); ///* handles all of the above structures in memory */ //CGradientModelSelection* grad_search=new CGradientModelSelection( // root, grad); ///* set autolocking to false to get rid of warnings */ //grad->set_autolock(false); ///*Search for best parameters*/ //CParameterCombination* best_combination=grad_search->select_model(true); ///*Output all the results and information*/ //if (best_combination) //{ // SG_SPRINT("best parameter(s):\n"); // best_combination->print_tree(); // best_combination->apply_to_machine(gp); //} //CGradientResult* result=(CGradientResult*)grad->evaluate(); //if(result->get_result_type() != GRADIENTEVALUATION_RESULT) // SG_SERROR("Evaluation result not a GradientEvaluationResult!"); //result->print_result(); //SGVector<float64_t> alpha = inf->get_alpha(); //SGVector<float64_t> labe = labels->get_labels(); //SGVector<float64_t> diagonal = inf->get_diagonal_vector(); //SGMatrix<float64_t> cholesky = inf->get_cholesky(); //gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV); //CRegressionLabels* covariance = gp->apply_regression(comb_features); //gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS); // //CRegressionLabels* predictions = gp->apply_regression(); //alpha.display_vector("Alpha Vector"); //labe.display_vector("Labels"); //diagonal.display_vector("sW Matrix"); //covariance->get_labels().display_vector("Predicted Variances"); //predictions->get_labels().display_vector("Mean Predictions"); //cholesky.display_matrix("Cholesky Matrix L"); //matrix.display_matrix("Training Features"); //matrix2.display_matrix("Testing Features"); ///*free memory*/ //SG_UNREF(predictions); //SG_UNREF(covariance); SG_UNREF(labels); SG_UNREF(comb_features); SG_UNREF(inf); SG_UNREF(gp); //SG_UNREF(grad_search); //SG_UNREF(best_combination); //SG_UNREF(result); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif #else //USE_GPL_SHOGUN int main(int argc, char **argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Heiko Strathmann */ #ifdef USE_GPL_SHOGUN #include <shogun/lib/config.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/machine/gp/ExactInferenceMethod.h> #include <shogun/machine/gp/ZeroMean.h> #include <shogun/machine/gp/GaussianLikelihood.h> #include <shogun/base/init.h> using namespace shogun; void test() { /* create some easy regression data: 1d noisy sine wave */ index_t n=100; float64_t x_range=6; SGMatrix<float64_t> X(1, n); SGMatrix<float64_t> X_test(1, n); SGVector<float64_t> Y(n); for (index_t i=0; i<n; ++i) { X[i]=CMath::random(0.0, x_range); X_test[i]=(float64_t)i / n*x_range; Y[i]=CMath::sin(X[i]); } /* shogun representation */ CDenseFeatures<float64_t>* feat_train=new CDenseFeatures<float64_t>(X); CDenseFeatures<float64_t>* feat_test=new CDenseFeatures<float64_t>(X_test); CRegressionLabels* label_train=new CRegressionLabels(Y); /* specity GPR with exact inference */ float64_t sigma=1; float64_t shogun_sigma=sigma*sigma*2; CGaussianKernel* kernel=new CGaussianKernel(10, shogun_sigma); CZeroMean* mean=new CZeroMean(); CGaussianLikelihood* lik=new CGaussianLikelihood(); lik->set_sigma(1); CExactInferenceMethod* inf=new CExactInferenceMethod(kernel, feat_train, mean, label_train, lik); CGaussianProcessRegression* gpr=new CGaussianProcessRegression(inf); /* perform inference */ CRegressionLabels* predictions=gpr->apply_regression(feat_test); predictions->get_labels().display_vector("predictions"); SG_UNREF(predictions); SG_UNREF(gpr); } int main(int argc, char** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #ifdef USE_GPL_SHOGUN #include <shogun/lib/config.h> #if defined(HAVE_NLOPT) #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/machine/gp/ExactInferenceMethod.h> #include <shogun/machine/gp/GaussianLikelihood.h> #include <shogun/machine/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> #include <shogun/kernel/CombinedKernel.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } /* HEIKO FIXME CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CCombinedKernel* kernel) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("scale"); c1 ->append_child(c2); c2->build_values(0.99, 1.01, R_LINEAR); CModelSelectionParameters* c3 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c3); CModelSelectionParameters* c4=new CModelSelectionParameters("sigma"); c3->append_child(c4); c4->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CList* list = kernel->get_list(); CModelSelectionParameters* cc1 = new CModelSelectionParameters("kernel_list", list); c5->append_child(cc1); CListElement* first = NULL; CSGObject* k = list->get_first_element(first); SG_UNREF(k); SG_REF(first); CModelSelectionParameters* cc2 = new CModelSelectionParameters("first", first); cc1->append_child(cc2); CKernel* sub_kernel1 = kernel->get_kernel(0); CModelSelectionParameters* cc3 = new CModelSelectionParameters("data", sub_kernel1); cc2->append_child(cc3); SG_UNREF(sub_kernel1); CListElement* second = first; k = list->get_next_element(second); SG_UNREF(k); SG_REF(second); CModelSelectionParameters* cc4 = new CModelSelectionParameters("next", second); cc2->append_child(cc4); CKernel* sub_kernel2 = kernel->get_kernel(1); CModelSelectionParameters* cc5 = new CModelSelectionParameters("data", sub_kernel2); cc4->append_child(cc5); SG_UNREF(sub_kernel2); CListElement* third = second; k = list->get_next_element(third); SG_UNREF(k); SG_REF(third); CModelSelectionParameters* cc6 = new CModelSelectionParameters("next", third); cc4->append_child(cc6); CKernel* sub_kernel3 = kernel->get_kernel(2); CModelSelectionParameters* cc7 = new CModelSelectionParameters("data", sub_kernel3); cc6->append_child(cc7); SG_UNREF(sub_kernel3); CModelSelectionParameters* c6 = new CModelSelectionParameters("width"); cc3->append_child(c6); c6->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c66 = new CModelSelectionParameters("combined_kernel_weight"); cc3->append_child(c66); c66->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c7 = new CModelSelectionParameters("width"); cc5->append_child(c7); c7->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c77 = new CModelSelectionParameters("combined_kernel_weight"); cc5->append_child(c77); c77->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c8 = new CModelSelectionParameters("width"); cc7->append_child(c8); c8->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c88 = new CModelSelectionParameters("combined_kernel_weight"); cc7->append_child(c88); c88->build_values(0.001, 1.0, R_LINEAR); SG_UNREF(list); return root; } */ int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); CCombinedKernel* test_kernel = new CCombinedKernel(); CGaussianKernel* sub_kernel1 = new CGaussianKernel(10, 2); CGaussianKernel* sub_kernel2 = new CGaussianKernel(10, 2); CGaussianKernel* sub_kernel3 = new CGaussianKernel(10, 2); test_kernel->append_kernel(sub_kernel1); test_kernel->append_kernel(sub_kernel2); test_kernel->append_kernel(sub_kernel3); SG_REF(comb_features); SG_REF(labels); /*Allocate our Mean Function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our Likelihood Model*/ CGaussianLikelihood* lik = new CGaussianLikelihood(); /*Allocate our inference method*/ CExactInferenceMethod* inf = new CExactInferenceMethod(test_kernel, comb_features, mean, labels, lik); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf); SG_REF(gp); //CModelSelectionParameters* root = build_tree(inf, lik, test_kernel); // ///*Criterion for gradient search*/ //CGradientCriterion* crit = new CGradientCriterion(); ///*This will evaluate our inference method for its derivatives*/ //CGradientEvaluation* grad=new CGradientEvaluation(gp, comb_features, labels, // crit); //grad->set_function(inf); //gp->print_modsel_params(); //root->print_tree(); ///* handles all of the above structures in memory */ //CGradientModelSelection* grad_search=new CGradientModelSelection( // root, grad); ///* set autolocking to false to get rid of warnings */ //grad->set_autolock(false); ///*Search for best parameters*/ //CParameterCombination* best_combination=grad_search->select_model(true); ///*Output all the results and information*/ //if (best_combination) //{ // SG_SPRINT("best parameter(s):\n"); // best_combination->print_tree(); // best_combination->apply_to_machine(gp); //} //CGradientResult* result=(CGradientResult*)grad->evaluate(); //if(result->get_result_type() != GRADIENTEVALUATION_RESULT) // SG_SERROR("Evaluation result not a GradientEvaluationResult!"); //result->print_result(); //SGVector<float64_t> alpha = inf->get_alpha(); //SGVector<float64_t> labe = labels->get_labels(); //SGVector<float64_t> diagonal = inf->get_diagonal_vector(); //SGMatrix<float64_t> cholesky = inf->get_cholesky(); //gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV); //CRegressionLabels* covariance = gp->apply_regression(comb_features); //gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS); // //CRegressionLabels* predictions = gp->apply_regression(); //alpha.display_vector("Alpha Vector"); //labe.display_vector("Labels"); //diagonal.display_vector("sW Matrix"); //covariance->get_labels().display_vector("Predicted Variances"); //predictions->get_labels().display_vector("Mean Predictions"); //cholesky.display_matrix("Cholesky Matrix L"); //matrix.display_matrix("Training Features"); //matrix2.display_matrix("Testing Features"); ///*free memory*/ //SG_UNREF(predictions); //SG_UNREF(covariance); SG_UNREF(labels); SG_UNREF(comb_features); SG_UNREF(inf); SG_UNREF(gp); //SG_UNREF(grad_search); //SG_UNREF(best_combination); //SG_UNREF(result); exit_shogun(); return 0; } #else // HAVE_NLOPT int main(int argc, char **argv) { return 0; } #endif // HAVE_NLOPT #else //USE_GPL_SHOGUN int main(int argc, char **argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/regression/svr/LibSVR.h> #include <shogun/evaluation/MeanSquaredError.h> using namespace shogun; void test_libsvr() { const int32_t kernel_cache=0; const float64_t rbf_width=10; const float64_t svm_C=10; const float64_t svm_nu=0.01; /* create some easy regression data: 1d noisy sine wave */ index_t n=100; float64_t x_range=6; SGMatrix<float64_t> feat_train(1, n); SGMatrix<float64_t> feat_test(1, n); SGVector<float64_t> lab_train(n); SGVector<float64_t> lab_test(n); for (index_t i=0; i<n; ++i) { feat_train[i]=CMath::random(0.0, x_range); feat_test[i]=(float64_t)i/n*x_range; lab_train[i]=CMath::sin(feat_train[i]); lab_test[i]=CMath::sin(feat_test[i]); } /* shogun representation */ CLabels* labels_train=new CRegressionLabels(lab_train); CLabels* labels_test=new CRegressionLabels(lab_test); CDenseFeatures<float64_t>* features_train=new CDenseFeatures<float64_t>( feat_train); CDenseFeatures<float64_t>* features_test=new CDenseFeatures<float64_t>( feat_test); CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, rbf_width); kernel->init(features_train, features_train); // also epsilon svr possible here LIBSVR_SOLVER_TYPE st=LIBSVR_NU_SVR; CLibSVR* svm=new CLibSVR(svm_C, svm_nu, kernel, labels_train, st); svm->train(); /* predict */ CRegressionLabels* predicted_labels=CLabelsFactory::to_regression( svm->apply(features_test)); /* evaluate */ CEvaluation* eval=new CMeanSquaredError(); SG_SPRINT("mean squared error: %f\n", eval->evaluate(predicted_labels, labels_test)); /* clean up */ SG_UNREF(eval); SG_UNREF(labels_test) SG_UNREF(predicted_labels); SG_UNREF(svm); } int main() { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_libsvr(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/features/DenseFeatures.h> #include <unistd.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } class CTestClass : public CSGObject { public: CTestClass() {} CTestClass(float64_t number, float64_t vec_start, int32_t features_start) { m_number=number; m_vec=SGVector<float64_t>(10); SGVector<float64_t>::range_fill_vector(m_vec.vector, m_vec.vlen, vec_start); m_mat=SGMatrix<float64_t>(3,3); SGVector<float64_t>::range_fill_vector(m_mat.matrix, m_mat.num_cols*m_mat.num_rows, vec_start); SGMatrix<int32_t> data=SGMatrix<int32_t>(3, 2); SGVector<int32_t>::range_fill_vector(data.matrix, data.num_rows*data.num_cols, features_start); m_features=new CDenseFeatures<int32_t>(data); SG_REF(m_features); m_parameters->add(&m_number, "number", "Test variable"); m_parameters->add(&m_mat, "mat", "Test variable"); m_parameters->add(&m_vec, "vec", "Test variable"); m_parameters->add((CSGObject**)&m_features, "features", "Test variable"); } virtual ~CTestClass() { SG_UNREF(m_features); } void print() { SG_PRINT("m_number=%f\n", m_number); SGVector<float64_t>::display_vector(m_vec.vector, m_vec.vlen, "m_vec"); SGVector<float64_t>::display_vector(m_mat.matrix, m_mat.num_cols*m_mat.num_rows, "m_mat"); SGMatrix<int32_t> features=m_features->get_feature_matrix(); SGMatrix<int32_t>::display_matrix(features.matrix, features.num_rows, features.num_cols, "m_features"); } inline virtual const char* get_name() const { return "TestClass"; } public: float64_t m_number; SGVector<float64_t> m_vec; SGMatrix<float64_t> m_mat; CDenseFeatures<int32_t>* m_features; }; void test_test_class_serial() { char filename_tmp[] = "serialization_test.XXXXXX"; int fd = mkstemp(filename_tmp); ASSERT(fd != -1); int retval = close(fd); ASSERT(retval != -1); char* filename = filename_tmp; CTestClass* to_save=new CTestClass(10, 0, 0); CTestClass* to_load=new CTestClass(20, 10, 66); SG_SPRINT("original instance 1:\n"); to_save->print(); SG_SPRINT("original instance 2:\n"); to_load->print(); CSerializableAsciiFile* file; file=new CSerializableAsciiFile(filename, 'w'); to_save->save_serializable(file); file->close(); SG_UNREF(file); file=new CSerializableAsciiFile(filename, 'r'); to_load->load_serializable(file); file->close(); SG_UNREF(file); SG_SPRINT("deserialized instance 1 into instance 2: (should be equal to " "first instance)\n"); to_load->print(); /* assert that variable is equal */ ASSERT(to_load->m_number==to_save->m_number); /* assert that vector is equal */ for (index_t i=0; i<to_load->m_vec.vlen; ++i) { ASSERT(to_load->m_vec[i]==to_save->m_vec[i]); } /* assert that matrix is equal */ for (index_t i=0; i<to_load->m_mat.num_cols*to_load->m_mat.num_rows; ++i) { ASSERT(to_load->m_mat[i]==to_save->m_mat[i]); } /* assert that features object is equal */ SGMatrix<int32_t> features_loaded=to_load->m_features->get_feature_matrix(); SGMatrix<int32_t> features_saved=to_save->m_features->get_feature_matrix(); for (index_t i=0; i<features_loaded.num_rows*features_loaded.num_cols; ++i) { ASSERT(features_loaded[i]==features_saved[i]); } SG_UNREF(to_save); SG_UNREF(to_load); unlink(filename); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_test_class_serial(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/io/SerializableJsonFile.h> #include <shogun/io/SerializableXmlFile.h> #include <shogun/io/SerializableHdf5File.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } const char* filename="filename.txt"; void print(Parameter* p) { TParameter* param=p->get_parameter(0); SGVector<float64_t>* v=(SGVector<float64_t>*)param->m_parameter; CMath::display_vector(v->vector, v->vlen, "vector:"); param=p->get_parameter(1); SGMatrix<float64_t>* m=(SGMatrix<float64_t>*)param->m_parameter; CMath::display_matrix(m->matrix, m->num_rows, m->num_cols, "matrix:"); } void check_content_equal(Parameter* save_param, Parameter* load_param) { TParameter* p; p=save_param->get_parameter(0); SGVector<float64_t>* sv=(SGVector<float64_t>*)p->m_parameter; p=save_param->get_parameter(1); SGMatrix<float64_t>* sm=(SGMatrix<float64_t>*)p->m_parameter; p=load_param->get_parameter(0); SGVector<float64_t>* lv=(SGVector<float64_t>*)p->m_parameter; p=load_param->get_parameter(1); SGMatrix<float64_t>* lm=(SGMatrix<float64_t>*)p->m_parameter; ASSERT(sv->vlen==lv->vlen); ASSERT(sm->num_rows==lm->num_rows); ASSERT(sm->num_cols==lm->num_cols); for (index_t i=0; i<sv->vlen; ++i) ASSERT(sv->vector[i]==lv->vector[i]); for (index_t i=0; i<sm->num_cols*sm->num_rows; ++i) ASSERT(sm->matrix[i]==lm->matrix[i]); } void test_ascii(Parameter* save_param, Parameter* load_param) { SG_SPRINT("testing ascii serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableAsciiFile* file; file=new CSerializableAsciiFile(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableAsciiFile(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void test_hdf5(Parameter* save_param, Parameter* load_param) { /* TODO, HDF5 file leaks memory */ SG_SPRINT("testing hdf5 serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableHdf5File* file; file=new CSerializableHdf5File(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableHdf5File(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void test_json(Parameter* save_param, Parameter* load_param) { /* TODO, json file leaks memory, also save methods */ SG_SPRINT("testing json serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableJsonFile* file; file=new CSerializableJsonFile(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableJsonFile(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void test_xml(Parameter* save_param, Parameter* load_param) { /* TODO, xml file leaks memory and produces a read error */ SG_SPRINT("testing xml serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableXmlFile* file; file=new CSerializableXmlFile(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableXmlFile(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void reset_values(Parameter* save_param, Parameter* load_param) { TParameter* p; p=save_param->get_parameter(0); SGVector<float64_t>* sv=(SGVector<float64_t>*)p->m_parameter; p=save_param->get_parameter(1); SGMatrix<float64_t>* sm=(SGMatrix<float64_t>*)p->m_parameter; p=load_param->get_parameter(0); SGVector<float64_t>* lv=(SGVector<float64_t>*)p->m_parameter; p=load_param->get_parameter(1); SGMatrix<float64_t>* lm=(SGMatrix<float64_t>*)p->m_parameter; sv->destroy_vector(); lv->destroy_vector(); sm->destroy_matrix(); lm->destroy_matrix(); *sv=SGVector<float64_t>(9); *lv=SGVector<float64_t>(3); *sm=SGMatrix<float64_t>(3, 3); *lm=SGMatrix<float64_t>(4, 4); CMath::range_fill_vector(sv->vector, sv->vlen); CMath::range_fill_vector(sm->matrix, sm->num_rows*sm->num_cols); CMath::fill_vector(lv->vector, lv->vlen, 0.0); CMath::fill_vector(lm->matrix, lm->num_rows*lm->num_cols, 0.0); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); /* for serialization */ SGVector<float64_t> sv; SGMatrix<float64_t> sm; Parameter* sp=new Parameter(); sp->add(&sv, "vector", "description"); sp->add(&sm, "matrix", "description"); /* for deserialization */ SGVector<float64_t> lv; SGMatrix<float64_t> lm; Parameter* lp=new Parameter(); lp->add(&lv, "vector", "description"); lp->add(&lm, "matrix", "description"); /* still leaks memory TODO */ reset_values(sp, lp); test_json(sp, lp); reset_values(sp, lp); test_ascii(sp, lp); /* still leaks memory TODO */ reset_values(sp, lp); test_hdf5(sp, lp); /* still leaks memory TODO */ reset_values(sp, lp); test_xml(sp, lp); /* clean up */ sv.destroy_vector(); sm.destroy_matrix(); lv.destroy_vector(); lm.destroy_matrix(); delete sp; delete lp; exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/SerializableAsciiFile.h> using namespace shogun; void test() { index_t n=10; index_t n_class=3; CMulticlassLabels* labels=new CMulticlassLabels(); SGVector<float64_t> lab(n); for (index_t i=0; i<n; ++i) lab[i]=i%n_class; labels->set_labels(lab); labels->allocate_confidences_for(n_class); SGVector<float64_t> conf(n_class); for (index_t i=0; i<n_class; ++i) conf[i]=CMath::randn_double(); for (index_t i=0; i<n; ++i) labels->set_multiclass_confidences(i, conf); /* create serialized copy */ const char* filename="multiclass_labels.txt"; CSerializableAsciiFile* file=new CSerializableAsciiFile(filename, 'w'); labels->save_serializable(file); file->close(); SG_UNREF(file); file=new CSerializableAsciiFile(filename, 'r'); CMulticlassLabels* labels_loaded=new CMulticlassLabels(); labels_loaded->load_serializable(file); file->close(); SG_UNREF(file); /* compare */ labels->get_labels().display_vector("labels"); labels_loaded->get_labels().display_vector("labels_loaded"); for (index_t i=0; i<n_class; ++i) { labels->get_multiclass_confidences(i).display_vector("confidences"); labels_loaded->get_multiclass_confidences(i).display_vector("confidences_loaded"); } SG_UNREF(labels_loaded); SG_UNREF(labels); } int main() { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test(); exit_shogun(); return 0; }
#include <shogun/io/SGIO.h> #include <shogun/base/init.h> #include <shogun/lib/SGVector.h> #include <shogun/lib/SGString.h> #include <shogun/lib/SGSparseVector.h> #include <shogun/lib/DynamicObjectArray.h> #include <shogun/structure/FactorGraph.h> #include <shogun/structure/FactorType.h> #include <shogun/structure/Factor.h> #include <shogun/labels/FactorGraphLabels.h> #include <shogun/structure/MAPInference.h> using namespace shogun; inline int grid_to_index(int32_t x, int32_t y, int32_t w = 10) { return x + w*y; } inline void index_to_grid(int32_t index, int32_t& x, int32_t& y, int32_t w = 10) { x = index % w; y = index / w; } void create_tree_graph(int hh, int ww) { SGVector<int32_t> card(2); card[0] = 2; card[1] = 2; SGVector<float64_t> w(4); w[0] = 0.0; // 0,0 w[1] = 0.5; // 1,0 w[2] = 0.5; // 0,1 w[3] = 0.0; // 1,1 int32_t tid = 0; CTableFactorType* factortype = new CTableFactorType(tid, card, w); SG_REF(factortype); SGVector<int32_t> vc(hh*ww); SGVector<int32_t>::fill_vector(vc.vector, vc.vlen, 2); CFactorGraph* fg = new CFactorGraph(vc); SG_REF(fg); // Add factors for (int32_t x = 0; x < ww; x++) { for (int32_t y = 0; y < hh; y++) { if (x > 0) { SGVector<float64_t> data; SGVector<int32_t> var_index(2); var_index[0] = grid_to_index(x,y,ww); var_index[1] = grid_to_index(x-1,y,ww); CFactor* fac1 = new CFactor(factortype, var_index, data); fg->add_factor(fac1); } if (x == 0 && y > 0) { SGVector<float64_t> data; SGVector<int32_t> var_index(2); var_index[0] = grid_to_index(x,y-1,ww); var_index[1] = grid_to_index(x,y,ww); CFactor* fac1 = new CFactor(factortype, var_index, data); fg->add_factor(fac1); } } } SG_UNREF(factortype); fg->connect_components(); SG_SPRINT("is acyclic graph? %d\n", fg->is_acyclic_graph()); SG_SPRINT("is connected graph? %d\n", fg->is_connected_graph()); SG_SPRINT("is tree graph? %d\n", fg->is_tree_graph()); SG_SPRINT("num of edges: %d\n", fg->get_num_edges()); fg->compute_energies(); CMAPInference infer_met(fg, TREE_MAX_PROD); infer_met.inference(); CFactorGraphObservation* fg_observ = infer_met.get_structured_outputs(); SGVector<int32_t> assignment = fg_observ->get_data(); SG_UNREF(fg_observ); assignment.display_vector(); SG_UNREF(fg); } int main(int argc, char** argv) { init_shogun_with_defaults(); //sg_io->set_loglevel(MSG_DEBUG); create_tree_graph(30, 30); exit_shogun(); return 0; }
#include <shogun/io/SGIO.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/Time.h> #include <shogun/mathematics/Math.h> #include <shogun/structure/PrimalMosekSOSVM.h> #include <shogun/structure/DualLibQPBMSOSVM.h> #include <shogun/structure/StochasticSOSVM.h> #include <shogun/structure/FWSOSVM.h> #include <shogun/structure/FactorType.h> #include <shogun/structure/MAPInference.h> #include <shogun/structure/FactorGraphModel.h> #include <shogun/features/FactorGraphFeatures.h> #include <shogun/labels/FactorGraphLabels.h> using namespace shogun; void test(int32_t num_samples) { CMath::init_random(17); // define factor type SGVector<int32_t> card(2); card[0] = 2; card[1] = 2; SGVector<float64_t> w(8); w[0] = 0.3; // 0,0 w[1] = 0.5; // 0,0 w[2] = 1.0; // 1,0 w[3] = 0.2; // 1,0 w[4] = 0.05; // 0,1 w[5] = 0.6; // 0,1 w[6] = -0.2; // 1,1 w[7] = 0.75; // 1,1 int32_t tid = 0; CTableFactorType* factortype = new CTableFactorType(tid, card, w); SG_REF(factortype); // create features and labels CFactorGraphFeatures* instances = new CFactorGraphFeatures(num_samples); SG_REF(instances); CFactorGraphLabels* labels = new CFactorGraphLabels(num_samples); SG_REF(labels); for (int32_t n = 0; n < num_samples; ++n) { // factor graph SGVector<int32_t> vc(3); SGVector<int32_t>::fill_vector(vc.vector, vc.vlen, 2); CFactorGraph* fg = new CFactorGraph(vc); // add factors SGVector<float64_t> data1(2); data1[0] = 2.0 * CMath::random(0.0, 1.0) - 1.0; data1[1] = 2.0 * CMath::random(0.0, 1.0) - 1.0; SGVector<int32_t> var_index1(2); var_index1[0] = 0; var_index1[1] = 1; CFactor* fac1 = new CFactor(factortype, var_index1, data1); fg->add_factor(fac1); SGVector<float64_t> data2(2); data2[0] = 2.0 * CMath::random(0.0, 1.0) - 1.0; data2[1] = 2.0 * CMath::random(0.0, 1.0) - 1.0; SGVector<int32_t> var_index2(2); var_index2[0] = 1; var_index2[1] = 2; CFactor* fac2 = new CFactor(factortype, var_index2, data2); fg->add_factor(fac2); // add factor graph instance instances->add_sample(fg); fg->connect_components(); fg->compute_energies(); CMAPInference infer_met(fg, TREE_MAX_PROD); infer_met.inference(); CFactorGraphObservation* fg_observ = infer_met.get_structured_outputs(); // add ground truth states labels->add_label(fg_observ); SG_UNREF(fg_observ); } #ifdef SHOW_DATA // show labels for (unsigned int n = 0; n < num_samples; ++n) { CFactorGraphObservation* fg_observ = CFactorGraphObservation::obtain_from_generic(labels->get_label(n)); SG_SPRINT("- sample %d:\n", n); SGVector<int32_t> fst = fg_observ->get_data(); SGVector<int32_t>::display_vector(fst.vector, fst.vlen); SG_UNREF(fg_observ); } #endif SG_SPRINT("----------------------------------------------------\n"); CFactorGraphModel* model = new CFactorGraphModel(instances, labels, TREE_MAX_PROD, false); SG_REF(model); // initialize model parameters SGVector<float64_t> w_truth = w.clone(); w.zero(); factortype->set_w(w); model->add_factor_type(factortype); #undef USE_MOSEK #ifdef USE_MOSEK // create primal mosek solver CPrimalMosekSOSVM* primcp = new CPrimalMosekSOSVM(model, labels); SG_REF(primcp); primcp->set_regularization(0.01); // TODO: check 1000 #endif // create BMRM solver CDualLibQPBMSOSVM* bmrm = new CDualLibQPBMSOSVM(model, labels, 0.01); bmrm->set_verbose(false); SG_REF(bmrm); // create SGD solver CStochasticSOSVM* sgd = new CStochasticSOSVM(model, labels); sgd->set_num_iter(100); sgd->set_lambda(0.01); SG_REF(sgd); // create FW solver CFWSOSVM* fw = new CFWSOSVM(model, labels); fw->set_num_iter(100); fw->set_lambda(0.01); fw->set_gap_threshold(0.01); SG_REF(fw); // timer CTime start; float64_t t1 = start.cur_time_diff(false); #ifdef USE_MOSEK // train PrimalMosek primcp->train(); float64_t t1 = start.cur_time_diff(false); #endif // train BMRM bmrm->train(); float64_t t2 = start.cur_time_diff(false); // train SGD sgd->train(); float64_t t3 = start.cur_time_diff(false); // train FW fw->train(); float64_t t4 = start.cur_time_diff(false); SG_SPRINT(">>>> PrimalMosekSOSVM trained in %9.4f\n", t1); SG_SPRINT(">>>> BMRM trained in %9.4f\n", t2-t1); SG_SPRINT(">>>> SGD trained in %9.4f\n", t3-t2); SG_SPRINT(">>>> FW trained in %9.4f\n", t4-t3); // check w #ifdef USE_MOSEK primcp->get_slacks().display_vector("slacks"); primcp->get_w().display_vector("w_mosek"); #endif bmrm->get_w().display_vector("w_bmrm"); sgd->get_w().display_vector("w_sgd"); fw->get_w().display_vector("w_fw"); w_truth.display_vector("w_truth"); #ifdef USE_MOSEK // Evaluation PrimalMosek CStructuredLabels* labels_primcp = CLabelsFactory::to_structured(primcp->apply()); SG_REF(labels_primcp); float64_t acc_loss_primcp = 0.0; float64_t ave_loss_primcp = 0.0; for (int32_t i=0; i<num_samples; ++i) { CStructuredData* y_pred = labels_primcp->get_label(i); CStructuredData* y_truth = labels->get_label(i); acc_loss_primcp += model->delta_loss(y_truth, y_pred); SG_UNREF(y_pred); SG_UNREF(y_truth); } ave_loss_primcp = acc_loss_primcp / static_cast<float64_t>(num_samples); SG_SPRINT("primal mosek solver: average training loss = %f\n", ave_loss_primcp); #endif // Evaluation BMRM CStructuredLabels* labels_bmrm = CLabelsFactory::to_structured(bmrm->apply()); SG_REF(labels_bmrm); float64_t acc_loss_bmrm = 0.0; float64_t ave_loss_bmrm = 0.0; for (int32_t i=0; i<num_samples; ++i) { CStructuredData* y_pred = labels_bmrm->get_label(i); CStructuredData* y_truth = labels->get_label(i); acc_loss_bmrm += model->delta_loss(y_truth, y_pred); SG_UNREF(y_pred); SG_UNREF(y_truth); } ave_loss_bmrm = acc_loss_bmrm / static_cast<float64_t>(num_samples); SG_SPRINT("bmrm solver: average training loss = %f\n", ave_loss_bmrm); // Evaluation SGD CStructuredLabels* labels_sgd = CLabelsFactory::to_structured(sgd->apply()); SG_REF(labels_sgd); float64_t acc_loss_sgd = 0.0; float64_t ave_loss_sgd = 0.0; for (int32_t i=0; i<num_samples; ++i) { CStructuredData* y_pred = labels_sgd->get_label(i); CStructuredData* y_truth = labels->get_label(i); acc_loss_sgd += model->delta_loss(y_truth, y_pred); SG_UNREF(y_pred); SG_UNREF(y_truth); } ave_loss_sgd = acc_loss_sgd / static_cast<float64_t>(num_samples); SG_SPRINT("sgd solver: average training loss = %f\n", ave_loss_sgd); // Evaluation FW CStructuredLabels* labels_fw = CLabelsFactory::to_structured(fw->apply()); SG_REF(labels_fw); float64_t acc_loss_fw = 0.0; float64_t ave_loss_fw = 0.0; for (int32_t i=0; i<num_samples; ++i) { CStructuredData* y_pred = labels_fw->get_label(i); CStructuredData* y_truth = labels->get_label(i); acc_loss_fw += model->delta_loss(y_truth, y_pred); SG_UNREF(y_pred); SG_UNREF(y_truth); } ave_loss_fw = acc_loss_fw / static_cast<float64_t>(num_samples); SG_SPRINT("fw solver: average training loss = %f\n", ave_loss_fw); #ifdef USE_MOSEK SG_UNREF(labels_primcp); SG_UNREF(primcp); #endif SG_UNREF(labels_fw); SG_UNREF(labels_sgd); SG_UNREF(labels_bmrm); SG_UNREF(fw); SG_UNREF(sgd); SG_UNREF(bmrm); SG_UNREF(model); SG_UNREF(labels); SG_UNREF(instances); SG_UNREF(factortype); } int main(int argc, char * argv[]) { init_shogun_with_defaults(); //sg_io->set_loglevel(MSG_DEBUG); test(100); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2014 Jiaolong Xu * Copyright (C) 2014 Jiaolong Xu */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/io/LibSVMFile.h> #include <shogun/lib/common.h> #include <shogun/lib/Time.h> #include <shogun/lib/DelimiterTokenizer.h> #include <shogun/lib/SGSparseVector.h> #include <shogun/base/DynArray.h> #include <shogun/base/init.h> #include <shogun/mathematics/Math.h> #include <shogun/structure/DualLibQPBMSOSVM.h> #include <shogun/structure/StochasticSOSVM.h> #include <shogun/structure/FactorType.h> #include <shogun/structure/MAPInference.h> #include <shogun/structure/FactorGraphModel.h> #include <shogun/features/FactorGraphFeatures.h> #include <shogun/labels/FactorGraphLabels.h> #include <shogun/structure/SOSVMHelper.h> using namespace shogun; #define NUM_STATUS 2 // each class has binary labels const char FNAME_TRAIN[] = "../../../../data/multilabel/scene_train"; const char FNAME_TEST[] = "../../../../data/multilabel/scene_test"; enum EGraphStructure { TREE = 0, // tree-structure graph FULL = 1 // full-connected graph }; struct MultilabelParameter { EGraphStructure graph_type; EMAPInferType infer_type; int32_t sgd_num_iter; float64_t sgd_lambda; MultilabelParameter() : graph_type(FULL), infer_type(GRAPH_CUT), sgd_num_iter(200), sgd_lambda(0.0001) {} MultilabelParameter(EGraphStructure graph, EMAPInferType infer, int32_t num_iter = 200, float64_t lambda = 0.0001) : graph_type(graph), infer_type(infer), sgd_num_iter(num_iter), sgd_lambda(lambda) {} ~MultilabelParameter() {} }; void read_data(const char * fname, SGMatrix<int32_t>& labels, SGMatrix<float64_t>& feats) { // sparse data from matrix CLibSVMFile * svmfile = new CLibSVMFile(fname); SGSparseVector<float64_t>* spv_feats; SGVector<float64_t>* pv_labels; int32_t dim_feat; int32_t num_samples; int32_t num_classes; svmfile->get_sparse_matrix(spv_feats, dim_feat, num_samples, pv_labels, num_classes); SG_SPRINT("Number of the samples: %d\n", num_samples); SG_SPRINT("Dimention of the feature: %d\n", dim_feat+1); SG_SPRINT("Number of classes: %d\n", num_classes); feats = SGMatrix<float64_t>(dim_feat+1, num_samples); labels = SGMatrix<int32_t>(num_classes, num_samples); feats.zero(); labels.zero(); for (int32_t i = 0; i < num_samples; i++) { SGVector<float64_t> v_feat = spv_feats[i].get_dense(); SGVector<float64_t> v_labels = pv_labels[i]; for (int32_t f = 0; f < v_feat.size(); f++) feats(f, i) = v_feat[f]; feats(dim_feat, i) = 1.0; // bias for (int32_t l = 0; l < v_labels.size(); l++) labels((int32_t)v_labels[l], i) = 1; } SG_UNREF(svmfile); SG_FREE(spv_feats); SG_FREE(pv_labels); } /** get tree-structured graph */ SGMatrix< int32_t > get_edges_tree() { SGMatrix< int32_t > label_tree_index; // A tree structure is defined by a 2-d matrix where // each row stores the indecies of a pair of connect factors // Define label tree structure label_tree_index = SGMatrix< int32_t > (5, 2); label_tree_index[0] = 0; label_tree_index[1] = 0; label_tree_index[2] = 1; label_tree_index[3] = 4; label_tree_index[4] = 2; label_tree_index[5] = 2; label_tree_index[6] = 3; label_tree_index[7] = 4; label_tree_index[8] = 5; label_tree_index[9] = 5; return label_tree_index; } /** get full-connected graph */ SGMatrix< int32_t > get_edges_full(const int32_t num_classes) { // A full-connected graph is defined by a 2-d matrix where // each row stores the indecies of a pair of connected nodes int32_t num_rows = num_classes*(num_classes - 1)/2; ASSERT(num_rows > 0); SGMatrix< int32_t > mat(num_rows, 2); int32_t k = 0; for (int32_t i = 0; i < num_classes - 1; i++) { for (int32_t j = i + 1; j < num_classes; j++) { mat[num_rows + k] = j; mat[k++] = i; } } return mat; } /** Get graph structure * * @param graph_type tree structure or full-connected graph * @param num_classes number of classes * * @return a matrix contains the indeces of the pairwise edges*/ SGMatrix<int32_t> get_edge_list(EGraphStructure graph_type, int32_t num_classes) { SGMatrix<int32_t> mat; switch (graph_type) { case TREE: mat = get_edges_tree(); break; case FULL: mat = get_edges_full(num_classes); break; default: mat = get_edges_tree(); break; } return mat; } void build_factor_graph(MultilabelParameter param, SGMatrix<float64_t> feats, SGMatrix<int32_t> labels, CFactorGraphFeatures * fg_feats, CFactorGraphLabels * fg_labels, const DynArray<CTableFactorType *>& v_ftp_u, const DynArray<CTableFactorType *>& v_ftp_t) { int32_t num_sample = labels.num_cols; int32_t num_classes = labels.num_rows; int32_t dim = feats.num_rows; SGMatrix< int32_t > mat_edges = get_edge_list(param.graph_type, num_classes); int32_t num_edges = mat_edges.num_rows; // prepare features and labels in factor graph for (int32_t n = 0; n < num_sample; n++) { SGVector<int32_t> vc(num_classes); SGVector<int32_t>::fill_vector(vc.vector, vc.vlen, NUM_STATUS); CFactorGraph * fg = new CFactorGraph(vc); float64_t * pfeat = feats.get_column_vector(n); SGVector<float64_t> feat_i(dim); memcpy(feat_i.vector, pfeat, dim * sizeof(float64_t)); // add unary factors for (int32_t u = 0; u < num_classes; u++) { SGVector<int32_t> var_index_u(1); var_index_u[0] = u; CFactor * fac_u = new CFactor(v_ftp_u[u], var_index_u, feat_i); fg->add_factor(fac_u); } // add pairwise factors for (int32_t t = 0; t < num_edges; t++) { SGVector<float64_t> data_t(1); data_t[0] = 1.0; SGVector<int32_t> var_index_t = mat_edges.get_row_vector(t); CFactor * fac_t = new CFactor(v_ftp_t[t], var_index_t, data_t); fg->add_factor(fac_t); } // add factor graph instance fg_feats->add_sample(fg); // add label int32_t * plabs = labels.get_column_vector(n); SGVector<int32_t> states_gt(num_classes); memcpy(states_gt.vector, plabs, num_classes * sizeof(int32_t)); SGVector<float64_t> loss_weights(num_classes); SGVector<float64_t>::fill_vector(loss_weights.vector, loss_weights.vlen, 1.0/num_classes); CFactorGraphObservation * fg_obs = new CFactorGraphObservation(states_gt, loss_weights); fg_labels->add_label(fg_obs); } } void evaluate(CFactorGraphModel * model, int32_t num_samples, CStructuredLabels * labels_sgd, \ CFactorGraphLabels * fg_labels, float64_t & ave_error) { float64_t acc_loss_sgd = 0.0; for (int32_t i = 0; i < num_samples; ++i) { CStructuredData * y_pred = labels_sgd->get_label(i); CStructuredData * y_truth = fg_labels->get_label(i); acc_loss_sgd += model->delta_loss(y_truth, y_pred); SG_UNREF(y_pred); SG_UNREF(y_truth); } ave_error = acc_loss_sgd / static_cast<float64_t>(num_samples); } void test(MultilabelParameter param, SGMatrix<int32_t> labels_train, SGMatrix<float64_t> feats_train, SGMatrix<int32_t> labels_test, SGMatrix<float64_t> feats_test) { int32_t num_sample_train = labels_train.num_cols; int32_t num_classes = labels_train.num_rows; int32_t dim = feats_train.num_rows; // Build factor graph SGMatrix< int32_t > mat_edges = get_edge_list(param.graph_type, num_classes); int32_t num_edges = mat_edges.num_rows; int32_t tid; // we have l = num_classes different weights: w_1, w_2, ..., w_l // so we create num_classes different unary factor types DynArray<CTableFactorType *> v_ftp_u; for (int32_t u = 0; u < num_classes; u++) { tid = u; SGVector<int32_t> card_u(1); card_u[0] = NUM_STATUS; SGVector<float64_t> w_u(dim * NUM_STATUS); w_u.zero(); v_ftp_u.append_element(new CTableFactorType(tid, card_u, w_u)); } // define factor type: tree edge factor // note that each edge is a new type DynArray<CTableFactorType *> v_ftp_t; for (int32_t t = 0; t < num_edges; t++) { tid = t + num_classes; SGVector<int32_t> card_t(2); card_t[0] = NUM_STATUS; card_t[1] = NUM_STATUS; SGVector<float64_t> w_t(NUM_STATUS * NUM_STATUS); w_t.zero(); v_ftp_t.append_element(new CTableFactorType(tid, card_t, w_t)); } // prepare features and labels in factor graph CFactorGraphFeatures * fg_feats_train = new CFactorGraphFeatures(num_sample_train); SG_REF(fg_feats_train); CFactorGraphLabels * fg_labels_train = new CFactorGraphLabels(num_sample_train); SG_REF(fg_labels_train); build_factor_graph(param, feats_train, labels_train, fg_feats_train, fg_labels_train, v_ftp_u, v_ftp_t); SG_SPRINT("----------------------------------------------------\n"); CFactorGraphModel * model = new CFactorGraphModel(fg_feats_train, fg_labels_train, param.infer_type, false); SG_REF(model); // initialize model parameters for (int32_t u = 0; u < num_classes; u++) model->add_factor_type(v_ftp_u[u]); for (int32_t t = 0; t < num_edges; t++) model->add_factor_type(v_ftp_t[t]); // create SGD solver CStochasticSOSVM * sgd = new CStochasticSOSVM(model, fg_labels_train, true); sgd->set_num_iter(param.sgd_num_iter); sgd->set_lambda(param.sgd_lambda); SG_REF(sgd); // timer CTime start; // train SGD sgd->train(); float64_t t2 = start.cur_time_diff(false); SG_SPRINT("SGD trained in %9.4f\n", t2); // Evaluation SGD CStructuredLabels * labels_sgd = CLabelsFactory::to_structured(sgd->apply()); SG_REF(labels_sgd); float64_t ave_loss_sgd = 0.0; evaluate(model, num_sample_train, labels_sgd, fg_labels_train, ave_loss_sgd); SG_SPRINT("sgd solver: average training loss = %f\n", ave_loss_sgd); SG_UNREF(labels_sgd); if(labels_test.num_cols > 0) { // prepare features and labels in factor graph int32_t num_sample_test = labels_test.num_cols; CFactorGraphFeatures * fg_feats_test = new CFactorGraphFeatures(num_sample_test); SG_REF(fg_feats_test); CFactorGraphLabels * fg_labels_test = new CFactorGraphLabels(num_sample_test); SG_REF(fg_labels_test); build_factor_graph(param, feats_test, labels_test, fg_feats_test, fg_labels_test, v_ftp_u, v_ftp_t); sgd->set_features(fg_feats_test); sgd->set_labels(fg_labels_test); labels_sgd = CLabelsFactory::to_structured(sgd->apply()); evaluate(model, num_sample_test, labels_sgd, fg_labels_test, ave_loss_sgd); SG_REF(labels_sgd); SG_SPRINT("sgd solver: average testing error = %f\n", ave_loss_sgd); SG_UNREF(fg_feats_test); SG_UNREF(fg_labels_test); } SG_UNREF(labels_sgd); SG_UNREF(sgd); SG_UNREF(model); SG_UNREF(fg_feats_train); SG_UNREF(fg_labels_train); } int main(int argc, char * argv[]) { init_shogun_with_defaults(); // Training data SGMatrix<int32_t> labels_train; SGMatrix<float64_t> feats_train; // Testing data SGMatrix<int32_t> labels_test; SGMatrix<float64_t> feats_test; // Train and test with real data FILE * pfile = fopen(FNAME_TRAIN, "r"); if (pfile == NULL) { SG_SPRINT("Unable to open file: %s\n", FNAME_TRAIN); return 0; } fclose(pfile); pfile = fopen(FNAME_TEST, "r"); if (pfile == NULL) { SG_SPRINT("Unable to open file: %s\n", FNAME_TEST); return 0; } fclose(pfile); SG_SPRINT("Experiment with real dataset: \n"); read_data(FNAME_TRAIN, labels_train, feats_train); read_data(FNAME_TEST, labels_test, feats_test); MultilabelParameter param; SG_SPRINT("\nExample 1: tree structure, max-product inference\n"); param = MultilabelParameter(TREE, TREE_MAX_PROD); test(param, labels_train, feats_train, labels_test, feats_test); SG_SPRINT("\nExample 2.1: tree structure, graph-cuts inference\n"); param = MultilabelParameter(TREE, GRAPH_CUT); test(param, labels_train, feats_train, labels_test, feats_test); SG_SPRINT("\nExample 2.2: full-connected graph, graph-cuts inference\n"); param = MultilabelParameter(FULL, GRAPH_CUT); test(param, labels_train, feats_train, labels_test, feats_test); SG_SPRINT("\nExample 3.1: tree structure, GEMPLP inference\n"); param = MultilabelParameter(TREE, GEMPLP); test(param, labels_train, feats_train, labels_test, feats_test); SG_SPRINT("\nExample 3.2: full-connected graph, GEMPLP inference\n"); param = MultilabelParameter(FULL, GEMPLP); test(param, labels_train, feats_train, labels_test, feats_test); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char * argv[]) { return 0; } #endif //USE_GPL_SHOGUN
#include <shogun/features/MatrixFeatures.h> #include <shogun/loss/HingeLoss.h> #include <shogun/structure/SequenceLabels.h> #include <shogun/structure/HMSVMModel.h> #include <shogun/structure/PrimalMosekSOSVM.h> using namespace shogun; int main(int argc, char ** argv) { init_shogun_with_defaults(); #ifdef USE_MOSEK // Create structured labels CSequenceLabels* labels = new CSequenceLabels(5, 2); // Label sequences of with two states int32_t lab1[] = {0, 0, 1, 1}; int32_t lab2[] = {1, 1, 1, 0}; int32_t lab3[] = {0, 1, 0, 1}; int32_t lab4[] = {1, 0, 0, 0}; int32_t lab5[] = {0, 1, 1, 0}; // No need for ref_counting in SGVector since the data is allocated // during compilation time labels->add_vector_label(SGVector< int32_t >(lab1, 4, false)); labels->add_vector_label(SGVector< int32_t >(lab2, 4, false)); labels->add_vector_label(SGVector< int32_t >(lab3, 4, false)); labels->add_vector_label(SGVector< int32_t >(lab4, 4, false)); labels->add_vector_label(SGVector< int32_t >(lab5, 4, false)); // Create features CMatrixFeatures< float64_t >* features = new CMatrixFeatures< float64_t >(5, 3); // Observation matrices with three states float64_t mat1[] = { 0., 1., 2., 1., 1., 1., 2., 2., 2., 1., 0., 1. }; float64_t mat2[] = { 1., 2., 2., 0., 2., 1., 1., 1., 0., 0., 2., 1. }; float64_t mat3[] = { 0., 1., 2., 1., 1., 2., 1., 1., 0., 0., 1., 0. }; float64_t mat4[] = { 1., 2., 1., 0., 2., 1., 0., 2., 0., 1., 0., 2. }; float64_t mat5[] = { 2., 2., 0., 1., 2., 1., 0., 1., 2., 0., 2., 0. }; features->set_feature_vector(SGMatrix< float64_t >(mat1, 3, 4, false), 0); features->set_feature_vector(SGMatrix< float64_t >(mat2, 3, 4, false), 1); features->set_feature_vector(SGMatrix< float64_t >(mat3, 3, 4, false), 2); features->set_feature_vector(SGMatrix< float64_t >(mat4, 3, 4, false), 3); features->set_feature_vector(SGMatrix< float64_t >(mat5, 3, 4, false), 4); CHMSVMModel* model = new CHMSVMModel(features, labels, SMT_TWO_STATE, 3); SG_REF(model); CPrimalMosekSOSVM* sosvm = new CPrimalMosekSOSVM(model, labels); SG_REF(sosvm); sosvm->train(); sosvm->get_w().display_vector("w"); sosvm->get_slacks().display_vector("slacks"); // Free memory SG_UNREF(sosvm); SG_UNREF(model); #endif /* USE_MOSEK */ exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/evaluation/MulticlassAccuracy.h> #include <shogun/evaluation/StructuredAccuracy.h> #include <shogun/features/DenseFeatures.h> #include <shogun/io/SGIO.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/labels/StructuredLabels.h> #include <shogun/lib/common.h> #include <shogun/loss/HingeLoss.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/mathematics/Math.h> #include <shogun/multiclass/MulticlassOneVsRestStrategy.h> #include <shogun/structure/MulticlassSOLabels.h> #include <shogun/structure/MulticlassModel.h> #include <shogun/structure/PrimalMosekSOSVM.h> #include <shogun/structure/DualLibQPBMSOSVM.h> #include <shogun/structure/StochasticSOSVM.h> #include <shogun/lib/Time.h> #include <shogun/base/init.h> #include <stdio.h> using namespace shogun; #define DIMS 2 #define EPSILON 10e-5 #define NUM_SAMPLES 100 #define NUM_CLASSES 10 char FNAME[] = "data.out"; void gen_rand_data(SGVector< float64_t > labs, SGMatrix< float64_t > feats) { float64_t means[DIMS]; float64_t stds[DIMS]; FILE* pfile = fopen(FNAME, "w"); for ( int32_t c = 0 ; c < NUM_CLASSES ; ++c ) { for ( int32_t j = 0 ; j < DIMS ; ++j ) { means[j] = CMath::random(-100, 100); stds[j] = CMath::random( 1, 5); } for ( int32_t i = 0 ; i < NUM_SAMPLES ; ++i ) { labs[c*NUM_SAMPLES+i] = c; fprintf(pfile, "%d", c); for ( int32_t j = 0 ; j < DIMS ; ++j ) { feats[(c*NUM_SAMPLES+i)*DIMS + j] = CMath::normal_random(means[j], stds[j]); fprintf(pfile, " %f", feats[(c*NUM_SAMPLES+i)*DIMS + j]); } fprintf(pfile, "\n"); } } fclose(pfile); } void read_data(SGVector< float64_t > labs, SGMatrix< float64_t > feats) { FILE* pfile = fopen(FNAME, "r"); if (pfile == NULL) SG_SERROR("Unable to open file: %s\n", FNAME); int32_t label, idx; float32_t value; for ( int32_t i = 0 ; i < NUM_SAMPLES*NUM_CLASSES ; ++i ) { fscanf(pfile, "%d", &label); labs[i] = label; for ( int32_t j = 0 ; j < DIMS ; ++j ) { fscanf(pfile, "%d:%f", &idx, &value); feats[i*DIMS + j] = value; } } fclose(pfile); } int main(int argc, char ** argv) { init_shogun_with_defaults(); SGVector< float64_t > labs(NUM_CLASSES*NUM_SAMPLES); SGMatrix< float64_t > feats(DIMS, NUM_CLASSES*NUM_SAMPLES); gen_rand_data(labs, feats); //read_data(labs, feats); // Create train labels CMulticlassSOLabels* labels = new CMulticlassSOLabels(labs); CMulticlassLabels* mlabels = new CMulticlassLabels(labs); // Create train features CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feats); // Create structured model CMulticlassModel* model = new CMulticlassModel(features, labels); // Create SO-SVM CPrimalMosekSOSVM* sosvm = new CPrimalMosekSOSVM(model, labels); CDualLibQPBMSOSVM* bundle = new CDualLibQPBMSOSVM(model, labels, 100); CStochasticSOSVM* sgd = new CStochasticSOSVM(model, labels); bundle->set_verbose(false); SG_REF(sosvm); SG_REF(bundle); SG_REF(sgd); CTime start; sosvm->train(); float64_t t1 = start.cur_time_diff(false); bundle->train(); float64_t t2 = start.cur_time_diff(false); sgd->train(); float64_t t3 = start.cur_time_diff(false); SG_SPRINT(">>>> PrimalMosekSOSVM trained in %9.4f\n", t1); SG_SPRINT(">>>> BMRM trained in %9.4f\n", t2-t1); SG_SPRINT(">>>> SGD trained in %9.4f\n", t3-t2); CStructuredLabels* out = CLabelsFactory::to_structured(sosvm->apply()); CStructuredLabels* bout = CLabelsFactory::to_structured(bundle->apply()); CStructuredLabels* sout = CLabelsFactory::to_structured(sgd->apply()); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(false); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CMulticlassOneVsRestStrategy(), (CDotFeatures*) features, svm, mlabels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); CMulticlassLabels* mout = CLabelsFactory::to_multiclass(mc_svm->apply()); SGVector< float64_t > w = sosvm->get_w(); for ( int32_t i = 0 ; i < w.vlen ; ++i ) SG_SPRINT("%10f ", w[i]); SG_SPRINT("\n\n"); for ( int32_t i = 0 ; i < NUM_CLASSES ; ++i ) { CLinearMachine* lm = (CLinearMachine*) mc_svm->get_machine(i); SGVector< float64_t > mw = lm->get_w(); for ( int32_t j = 0 ; j < mw.vlen ; ++j ) SG_SPRINT("%10f ", mw[j]); SG_UNREF(lm); // because of CLinearMulticlassMachine::get_machine() } SG_SPRINT("\n"); CStructuredAccuracy* structured_evaluator = new CStructuredAccuracy(); CMulticlassAccuracy* multiclass_evaluator = new CMulticlassAccuracy(); SG_REF(structured_evaluator); SG_REF(multiclass_evaluator); SG_SPRINT("SO-SVM: %5.2f%\n", 100.0*structured_evaluator->evaluate(out, labels)); SG_SPRINT("BMRM: %5.2f%\n", 100.0*structured_evaluator->evaluate(bout, labels)); SG_SPRINT("SGD: %5.2f%\n", 100.0*structured_evaluator->evaluate(sout, labels)); SG_SPRINT("MC: %5.2f%\n", 100.0*multiclass_evaluator->evaluate(mout, mlabels)); // Free memory SG_UNREF(multiclass_evaluator); SG_UNREF(structured_evaluator); SG_UNREF(mout); SG_UNREF(mc_svm); SG_UNREF(sgd); SG_UNREF(bundle); SG_UNREF(sosvm); SG_UNREF(sout); SG_UNREF(bout); SG_UNREF(out); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Michal Uricar * Copyright (C) 2012 Michal Uricar */ #include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/base/init.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/features/DenseFeatures.h> #include <shogun/io/SGIO.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/labels/StructuredLabels.h> #include <shogun/lib/common.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/mathematics/Math.h> #include <shogun/multiclass/MulticlassOneVsRestStrategy.h> #include <shogun/structure/MulticlassSOLabels.h> #include <shogun/structure/BmrmStatistics.h> #include <shogun/structure/MulticlassModel.h> #include <shogun/structure/DualLibQPBMSOSVM.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/features/streaming/StreamingSparseFeatures.h> using namespace shogun; #define DIMS 2 #define EPSILON 10e-5 #define NUM_SAMPLES 30 #define NUM_CLASSES 3 char FNAME[] = "data.svmlight"; /** Reads multiclass trainig data stored in svmlight format (i.e. label nz_idx_1:value1 nz_idx_2:value2 ... nz_idx_N:valueN ) * * @param fname path to file with training data * @param DIM dimension of features * @param N number of feature vectors * @param labs vector with labels * @param feats matrix with features */ void read_data(const char fname[], uint32_t DIM, uint32_t N, SGVector<float64_t> labs, SGMatrix<float64_t> feats) { CStreamingAsciiFile* file=new CStreamingAsciiFile(fname); SG_REF(file); CStreamingSparseFeatures< float64_t >* stream_features= new CStreamingSparseFeatures< float64_t >(file, true, 1024); SG_REF(stream_features); SGVector<float64_t > vec(DIM); stream_features->start_parser(); uint32_t num_vectors=0; while (stream_features->get_next_example()) { vec.zero(); stream_features->add_to_dense_vec(1.0, vec, DIM); labs[num_vectors]=stream_features->get_label(); for (uint32_t i=0; i<DIM; ++i) feats[num_vectors*DIM+i]=vec[i]; num_vectors++; stream_features->release_example(); } stream_features->end_parser(); SG_UNREF(stream_features); } /** Generates random multiclass training data and stores them in svmlight format * * @param labs returned vector with labels * @param feats returned matrix with features */ void gen_rand_data(SGVector< float64_t > labs, SGMatrix< float64_t > feats) { float64_t means[DIMS]; float64_t stds[DIMS]; FILE* pfile = fopen(FNAME, "w"); CMath::init_random(17); for ( int32_t c = 0 ; c < NUM_CLASSES ; ++c ) { for ( int32_t j = 0 ; j < DIMS ; ++j ) { means[j] = CMath::random(-100, 100); stds[j] = CMath::random( 1, 5); } for ( int32_t i = 0 ; i < NUM_SAMPLES ; ++i ) { labs[c*NUM_SAMPLES+i] = c; fprintf(pfile, "%d", c); for ( int32_t j = 0 ; j < DIMS ; ++j ) { feats[(c*NUM_SAMPLES+i)*DIMS + j] = CMath::normal_random(means[j], stds[j]); fprintf(pfile, " %d:%f", j+1, feats[(c*NUM_SAMPLES+i)*DIMS + j]); } fprintf(pfile, "\n"); } } fclose(pfile); } int main(int argc, char * argv[]) { // initialization //------------------------------------------------------------------------- float64_t lambda=0.01, eps=0.01; bool icp=1; uint32_t cp_models=1; ESolver solver=BMRM; uint32_t feat_dim, num_feat; init_shogun_with_defaults(); if (argc > 1 && argc < 8) { SG_SERROR("Usage: so_multiclass_BMRM <data.in> <feat_dim> <num_feat> <lambda> <icp> <epsilon> <solver> [<cp_models>]\n"); return -1; } if (argc > 1) { // parse command line arguments for parameters setting SG_SPRINT("arg[1] = %s\n", argv[1]); feat_dim=::atoi(argv[2]); num_feat=::atoi(argv[3]); lambda=::atof(argv[4]); icp=::atoi(argv[5]); eps=::atof(argv[6]); if (strcmp("BMRM", argv[7])==0) solver=BMRM; if (strcmp("PPBMRM", argv[7])==0) solver=PPBMRM; if (strcmp("P3BMRM", argv[7])==0) solver=P3BMRM; if (argc > 8) { cp_models=::atoi(argv[8]); } } else { // default parameters feat_dim=DIMS; num_feat=NUM_SAMPLES*NUM_CLASSES; lambda=1e3; icp=1; eps=0.01; solver=BMRM; } SGVector<float64_t> labs(num_feat); SGMatrix<float64_t> feats(feat_dim, num_feat); if (argc==1) { gen_rand_data(labs, feats); } else { // read data read_data(argv[1], feat_dim, num_feat, labs, feats); } // Create train labels CMulticlassSOLabels* labels = new CMulticlassSOLabels(labs); // Create train features CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feats); // Create structured model CMulticlassModel* model = new CMulticlassModel(features, labels); // Create SO-SVM CDualLibQPBMSOSVM* sosvm = new CDualLibQPBMSOSVM( model, labels, lambda); SG_REF(sosvm); sosvm->set_cleanAfter(10); sosvm->set_cleanICP(icp); sosvm->set_TolRel(eps); sosvm->set_cp_models(cp_models); sosvm->set_solver(solver); // Train //------------------------------------------------------------------------- SG_SPRINT("Train using lambda = %lf ICP removal = %d \n", sosvm->get_lambda(), sosvm->get_cleanICP()); sosvm->train(); BmrmStatistics res = sosvm->get_result(); SG_SPRINT("result = { Fp=%lf, Fd=%lf, nIter=%d, nCP=%d, nzA=%d, exitflag=%d }\n", res.Fp, res.Fd, res.nIter, res.nCP, res.nzA, res.exitflag); CStructuredLabels* out = CLabelsFactory::to_structured(sosvm->apply()); SG_REF(out); SG_SPRINT("\n"); // Compute error //------------------------------------------------------------------------- float64_t error=0.0; for (uint32_t i=0; i<num_feat; ++i) { CRealNumber* rn = CRealNumber::obtain_from_generic( out->get_label(i) ); error+=(rn->value==labs.get_element(i)) ? 0.0 : 1.0; SG_UNREF(rn); // because of out->get_label(i) above } SG_SPRINT("Error = %lf %% \n", error/num_feat*100); // Free memory SG_UNREF(sosvm); SG_UNREF(out); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char * argv[]) { return 0; } #endif //USE_GPL_SHOGUN
/* * This software is distributed under BSD 3-clause license (see LICENSE file). * * Copyright(C) 2014 Abinash Panda * Written(W) 2014 Abinash Panda */ #include <shogun/base/init.h> #include <shogun/evaluation/StructuredAccuracy.h> #include <shogun/features/SparseFeatures.h> #include <shogun/io/LibSVMFile.h> #include <shogun/io/SGIO.h> #include <shogun/lib/SGSparseVector.h> #include <shogun/structure/MultilabelModel.h> #include <shogun/structure/MultilabelSOLabels.h> #include <shogun/structure/StochasticSOSVM.h> #include <shogun/structure/DualLibQPBMSOSVM.h> #include <shogun/structure/PrimalMosekSOSVM.h> #include <shogun/lib/Time.h> #include <shogun/mathematics/Math.h> using namespace shogun; void load_data(const char * file_name, SGMatrix<float64_t> &feats_matrix, int32_t &dim_feat, int32_t &num_samples, SGVector<int32_t> * &multilabels, int32_t &num_classes) { CLibSVMFile * file = new CLibSVMFile(file_name); ASSERT(file != NULL); SG_REF(file); SGSparseVector<float64_t> * feats; SGVector<float64_t> * labels; file->get_sparse_matrix( feats, dim_feat, num_samples, labels, num_classes); feats_matrix = SGMatrix<float64_t>(dim_feat, num_samples); /** preparation of data for multilabel model */ for (index_t i = 0; i < num_samples; i++) { SGSparseVector<float64_t> feat_sample = feats[i]; for (index_t j = 0; j < dim_feat; j++) feats_matrix[i * dim_feat + j] = feat_sample.get_feature(j); } multilabels = SG_MALLOC(SGVector<int32_t>, num_samples); for (index_t i = 0; i < num_samples; i++) { SGVector<float64_t> label_sample = labels[i]; SGVector<int32_t> multilabel_sample(label_sample.vlen); for (index_t j = 0; j < label_sample.vlen; j++) multilabel_sample[j] = label_sample[j]; CMath::qsort(multilabel_sample); multilabels[i] = multilabel_sample; } SG_UNREF(file); SG_FREE(feats); SG_FREE(labels); } int main(int argc, char ** argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); const char train_file_name[] = "../../../../data/multilabel/yeast_train.svm"; const char test_file_name[] = "../../../../data/multilabel/yeast_test.svm"; SGMatrix<float64_t> feats_matrix; SGVector<int32_t> * multilabels; int32_t dim_feat; int32_t num_samples; int32_t num_classes; load_data( train_file_name, feats_matrix, dim_feat, num_samples, multilabels, num_classes); SG_SPRINT("Number of samples = %d\n", num_samples); SG_SPRINT("Dimension of feature = %d\n", dim_feat); SG_SPRINT("Number of classes = %d\n", num_classes); SG_SPRINT("-------------------------------------------\n"); CMultilabelSOLabels * mlabels = new CMultilabelSOLabels(num_samples, num_classes); SG_REF(mlabels); mlabels->set_sparse_labels(multilabels); CSparseFeatures<float64_t> * features = new CSparseFeatures<float64_t>( feats_matrix); SG_REF(features); CMultilabelModel * model = new CMultilabelModel(features, mlabels); SG_REF(model); CStochasticSOSVM * sgd = new CStochasticSOSVM(model, mlabels); SG_REF(sgd); CDualLibQPBMSOSVM * bundle = new CDualLibQPBMSOSVM(model, mlabels, 100); bundle->set_verbose(false); SG_REF(bundle); CPrimalMosekSOSVM * sosvm = new CPrimalMosekSOSVM(model, mlabels); SG_REF(sosvm); CTime * start = new CTime(); SG_REF(start); sgd->train(); float64_t t1 = start->cur_time_diff(false); bundle->train(); float64_t t2 = start->cur_time_diff(false); sosvm->train(); float64_t t3 = start->cur_time_diff(false); SG_SPRINT(">>> Time taken for training using %s = %f\n", sgd->get_name(), t1); SG_SPRINT(">>> Time taken for training using %s = %f\n", bundle->get_name(), t2 - t1); SG_SPRINT(">>> Time taken for learning using %s = %f\n", sosvm->get_name(), t3 - t2); SGMatrix<float64_t> test_feats_matrix; SGVector<int32_t> * test_multilabels; load_data( test_file_name, test_feats_matrix, dim_feat, num_samples, test_multilabels, num_classes); CSparseFeatures<float64_t> * test_features = new CSparseFeatures<float64_t>( test_feats_matrix); SG_REF(test_features); CMultilabelSOLabels * test_labels = new CMultilabelSOLabels(num_samples, num_classes); SG_REF(test_labels); test_labels->set_sparse_labels(test_multilabels); CStructuredLabels * out = CLabelsFactory::to_structured( sgd->apply(test_features)); CStructuredLabels * bout = CLabelsFactory::to_structured( bundle->apply(test_features)); CStructuredLabels * sout = CLabelsFactory::to_structured( sosvm->apply(test_features)); CStructuredAccuracy * evaluator = new CStructuredAccuracy(); SG_REF(evaluator); SG_SPRINT(">>> Accuracy of multilabel classification using %s = %f\n", sgd->get_name(), evaluator->evaluate(out, test_labels)); SG_SPRINT(">>> Accuracy of multilabel classification using %s = %f\n", bundle->get_name(), evaluator->evaluate(bout, test_labels)); SG_SPRINT(">>> Accuracy of multilabel classification using %s = %f\n", sosvm->get_name(), evaluator->evaluate(sout, test_labels)); SG_UNREF(bout); SG_UNREF(bundle); SG_UNREF(evaluator); SG_UNREF(features); SG_UNREF(mlabels); SG_UNREF(model); SG_UNREF(out); SG_UNREF(sgd); SG_UNREF(sosvm); SG_UNREF(sout); SG_UNREF(start); SG_UNREF(test_features); SG_UNREF(test_labels); SG_FREE(multilabels); SG_FREE(test_multilabels); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Saurabh Mahindre */ #include <shogun/base/init.h> #include <shogun/evaluation/LOOCrossValidationSplitting.h> #include <shogun/labels/RegressionLabels.h> using namespace shogun; int main(int argc, char **argv) { init_shogun_with_defaults(); index_t num_labels; index_t runs=10; while (runs-->0) { num_labels=CMath::random(10, 50); //SG_SPRINT("num_labels=%d\n\n", num_labels); /* build labels */ CRegressionLabels* labels=new CRegressionLabels(num_labels); for (index_t i=0; i<num_labels; ++i) { labels->set_label(i, CMath::random(-10.0, 10.0)); // SG_SPRINT("label(%d)=%.18g\n", i, labels->get_label(i)); } //SG_SPRINT("\n"); /* build Leave one out splitting strategy */ CLOOCrossValidationSplitting* splitting= new CLOOCrossValidationSplitting(labels); splitting->build_subsets(); for (index_t i=0; i<num_labels; ++i) { //SG_SPRINT("subset %d\n", i); SGVector<index_t> subset=splitting->generate_subset_indices(i); SGVector<index_t> inverse=splitting->generate_subset_inverse(i); SGVector<index_t>::display_vector(subset.vector, subset.vlen, "subset indices"); SGVector<index_t>::display_vector(inverse.vector, inverse.vlen, "inverse indices"); /*for (index_t j=0; j<subset.vlen; ++j) SG_SPRINT("%d:(%f),", subset.vector[j], labels->get_label(subset.vector[j])); SG_SPRINT("\n"); SG_SPRINT("inverse %d\n", i); for (index_t j=0; j<inverse.vlen; ++j) SG_SPRINT("%d(%d),", inverse.vector[j], (int32_t)labels->get_label(inverse.vector[j])); SG_SPRINT("\n\n"); */ } /* clean up */ SG_UNREF(splitting); } exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidationSplitting.h> #include <shogun/labels/RegressionLabels.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_labels; index_t num_subsets; index_t runs=100; while (runs-->0) { num_labels=CMath::random(10, 150); num_subsets=CMath::random(1, 5); index_t desired_size=CMath::round( (float64_t)num_labels/(float64_t)num_subsets); /* this will throw an error */ if (num_labels<num_subsets) continue; SG_SPRINT("num_labels=%d\nnum_subsets=%d\n\n", num_labels, num_subsets); /* build labels */ CRegressionLabels* labels=new CRegressionLabels(num_labels); for (index_t i=0; i<num_labels; ++i) { labels->set_label(i, CMath::random(-10.0, 10.0)); SG_SPRINT("label(%d)=%.18g\n", i, labels->get_label(i)); } SG_SPRINT("\n"); /* build splitting strategy */ CCrossValidationSplitting* splitting= new CCrossValidationSplitting(labels, num_subsets); /* build index sets (twice to ensure memory is not leaking) */ splitting->build_subsets(); splitting->build_subsets(); for (index_t i=0; i<num_subsets; ++i) { SG_SPRINT("subset %d\n", i); SGVector<index_t> subset=splitting->generate_subset_indices(i); SGVector<index_t> inverse=splitting->generate_subset_inverse(i); SGVector<index_t>::display_vector(subset.vector, subset.vlen, "subset indices"); SGVector<index_t>::display_vector(inverse.vector, inverse.vlen, "inverse indices"); SG_SPRINT("checking subset size: %d vs subset desired size %d\n", subset.vlen, desired_size); ASSERT(CMath::abs(subset.vlen-desired_size)<=1); ASSERT(subset.vlen+inverse.vlen==num_labels); for (index_t j=0; j<subset.vlen; ++j) SG_SPRINT("%d:(%f),", subset.vector[j], labels->get_label(j)); SG_SPRINT("\n"); SG_SPRINT("inverse %d\n", i); for (index_t j=0; j<inverse.vlen; ++j) SG_SPRINT("%d(%d),", inverse.vector[j], (int32_t)labels->get_label(j)); SG_SPRINT("\n\n"); } /* clean up */ SG_UNREF(splitting); } exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/labels/MulticlassLabels.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_labels, num_classes, num_subsets; index_t runs=50; while (runs-->0) { num_labels=CMath::random(5, 100); num_classes=CMath::random(2, 10); num_subsets=CMath::random(1, 10); /* this will throw an error */ if (num_labels<num_subsets) continue; SG_SPRINT("num_labels=%d\nnum_classes=%d\nnum_subsets=%d\n\n", num_labels, num_classes, num_subsets); /* build labels */ CMulticlassLabels* labels=new CMulticlassLabels(num_labels); for (index_t i=0; i<num_labels; ++i) { labels->set_label(i, CMath::random()%num_classes); SG_SPRINT("label(%d)=%.18g\n", i, labels->get_label(i)); } SG_SPRINT("\n"); /* print classes */ SGVector<float64_t> classes=labels->get_unique_labels(); SGVector<float64_t>::display_vector(classes.vector, classes.vlen, "classes"); /* build splitting strategy */ CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* build index sets (twice to ensure memory is not leaking) */ splitting->build_subsets(); splitting->build_subsets(); for (index_t i=0; i<num_subsets; ++i) { SGVector<index_t> subset=splitting->generate_subset_indices(i); SGVector<index_t> inverse=splitting->generate_subset_inverse(i); SG_SPRINT("subset %d\n", i); for (index_t j=0; j<subset.vlen; ++j) SG_SPRINT("%d(%d),", subset.vector[j], (int32_t)labels->get_label(j)); SG_SPRINT("\n"); SG_SPRINT("inverse %d\n", i); for (index_t j=0; j<inverse.vlen; ++j) SG_SPRINT("%d(%d),", inverse.vector[j], (int32_t)labels->get_label(j)); SG_SPRINT("\n\n"); } /* check whether number of labels in every subset is nearly equal */ for (index_t i=0; i<num_classes; ++i) { SG_SPRINT("checking class %d\n", i); /* count number of elements for this class */ SGVector<index_t> temp=splitting->generate_subset_indices(0); int32_t count=0; for (index_t j=0; j<temp.vlen; ++j) { if ((int32_t)labels->get_label(temp.vector[j])==i) ++count; } /* check all subsets for same ratio */ for (index_t j=0; j<num_subsets; ++j) { SGVector<index_t> subset=splitting->generate_subset_indices(j); int32_t temp_count=0; for (index_t k=0; k<subset.vlen; ++k) { if ((int32_t)labels->get_label(subset.vector[k])==i) ++temp_count; } /* at most one difference */ SG_SPRINT("number in subset %d: %d\n", j, temp_count); ASSERT(CMath::abs(temp_count-count)<=1); } } /* clean up */ SG_UNREF(splitting); } exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/statistics/HSIC.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/mathematics/Statistics.h> using namespace shogun; void create_fixed_data_kernel_small(CFeatures*& features_p, CFeatures*& features_q, CKernel*& kernel_p, CKernel*& kernel_q) { index_t m=2; index_t d=3; SGMatrix<float64_t> p(d,2*m); for (index_t i=0; i<2*d*m; ++i) p.matrix[i]=i; // p.display_matrix("p"); SGMatrix<float64_t> q(d,2*m); for (index_t i=0; i<2*d*m; ++i) q.matrix[i]=i+10; // q.display_matrix("q"); features_p=new CDenseFeatures<float64_t>(p); features_q=new CDenseFeatures<float64_t>(q); float64_t sigma_x=2; float64_t sigma_y=3; float64_t sq_sigma_x_twice=sigma_x*sigma_x*2; float64_t sq_sigma_y_twice=sigma_y*sigma_y*2; /* shoguns kernel width is different */ kernel_p=new CGaussianKernel(10, sq_sigma_x_twice); kernel_q=new CGaussianKernel(10, sq_sigma_y_twice); } void create_fixed_data_kernel_big(CFeatures*& features_p, CFeatures*& features_q, CKernel*& kernel_p, CKernel*& kernel_q) { index_t m=10; index_t d=7; SGMatrix<float64_t> p(d,m); for (index_t i=0; i<d*m; ++i) p.matrix[i]=(i+8)%3; // p.display_matrix("p"); SGMatrix<float64_t> q(d,m); for (index_t i=0; i<d*m; ++i) q.matrix[i]=((i+10)*(i%4+2))%4; // q.display_matrix("q"); features_p=new CDenseFeatures<float64_t>(p); features_q=new CDenseFeatures<float64_t>(q); float64_t sigma_x=2; float64_t sigma_y=3; float64_t sq_sigma_x_twice=sigma_x*sigma_x*2; float64_t sq_sigma_y_twice=sigma_y*sigma_y*2; /* shoguns kernel width is different */ kernel_p=new CGaussianKernel(10, sq_sigma_x_twice); kernel_q=new CGaussianKernel(10, sq_sigma_y_twice); } /** tests the hsic statistic for a single fixed data case and ensures * equality with sma implementation */ void test_hsic_fixed() { CFeatures* features_p=NULL; CFeatures* features_q=NULL; CKernel* kernel_p=NULL; CKernel* kernel_q=NULL; create_fixed_data_kernel_small(features_p, features_q, kernel_p, kernel_q); index_t m=features_p->get_num_vectors(); CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q); /* assert matlab result, note that compute statistic computes m*hsic */ float64_t difference=hsic->compute_statistic(); SG_SPRINT("hsic fixed: %f\n", difference); ASSERT(CMath::abs(difference-m*0.164761446385339)<10E-16); SG_UNREF(hsic); } void test_hsic_gamma() { CFeatures* features_p=NULL; CFeatures* features_q=NULL; CKernel* kernel_p=NULL; CKernel* kernel_q=NULL; create_fixed_data_kernel_big(features_p, features_q, kernel_p, kernel_q); CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q); hsic->set_null_approximation_method(HSIC_GAMMA); float64_t p=hsic->compute_p_value(0.05); SG_SPRINT("p-value: %f\n", p); // disabled as I think previous inverse_gamma_cdf was faulty // now unit test fails. Needs to be investigated statistically //ASSERT(CMath::abs(p-0.172182287884256)<10E-15); SG_UNREF(hsic); } void test_hsic_sample_null() { CFeatures* features_p=NULL; CFeatures* features_q=NULL; CKernel* kernel_p=NULL; CKernel* kernel_q=NULL; create_fixed_data_kernel_big(features_p, features_q, kernel_p, kernel_q); CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q); /* do sampling null */ hsic->set_null_approximation_method(PERMUTATION); float64_t p=hsic->compute_p_value(0.05); SG_SPRINT("p-value: %f\n", p); /* ensure that sampling null of hsic leads to same results as using * CKernelIndependenceTest */ CMath::init_random(1); float64_t mean1=CStatistics::mean(hsic->sample_null()); float64_t var1=CStatistics::variance(hsic->sample_null()); SG_SPRINT("mean1=%f, var1=%f\n", mean1, var1); CMath::init_random(1); float64_t mean2=CStatistics::mean( hsic->CKernelIndependenceTest::sample_null()); float64_t var2=CStatistics::variance(hsic->sample_null()); SG_SPRINT("mean2=%f, var2=%f\n", mean2, var2); /* assert than results are the same from bot sampling null impl. */ ASSERT(CMath::abs(mean1-mean2)<10E-8); ASSERT(CMath::abs(var1-var2)<10E-8); SG_UNREF(hsic); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_hsic_fixed(); test_hsic_gamma(); test_hsic_sample_null(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/statistics/LinearTimeMMD.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/streaming/generators/MeanShiftDataGenerator.h> #include <shogun/mathematics/Statistics.h> using namespace shogun; void linear_time_mmd() { /* note that the linear time statistic is designed for much larger datasets * so increase to get reasonable results */ index_t m=1000; index_t dim=2; float64_t difference=0.5; /* streaming data generator for mean shift distributions */ CMeanShiftDataGenerator* gen_p=new CMeanShiftDataGenerator(0, dim); CMeanShiftDataGenerator* gen_q=new CMeanShiftDataGenerator(difference, dim); /* set kernel a-priori. usually one would do some kernel selection. See * other examples for this. */ float64_t width=10; CGaussianKernel* kernel=new CGaussianKernel(10, width); /* create linear time mmd instance */ index_t blocksize=1000; CLinearTimeMMD* mmd=new CLinearTimeMMD(kernel, gen_p, gen_q, m, blocksize); /* perform test: compute p-value and test if null-hypothesis is rejected for * a test level of 0.05 */ float64_t alpha=0.05; /* using bootstrapping (not reccomended for linear time MMD, since slow). * Also, in practice, use at least 250 iterations */ mmd->set_null_approximation_method(PERMUTATION); mmd->set_num_null_samples(10); float64_t p_value_bootstrap=mmd->perform_test(); /* reject if p-value is smaller than test level */ SG_SPRINT("bootstrap: p!=q: %d\n", p_value_bootstrap<alpha); /* using Gaussian approximation (use large sample size, check type I error). * Also, in practice, use at least 250 iterations */ mmd->set_null_approximation_method(MMD1_GAUSSIAN); float64_t p_value_gaussian=mmd->perform_test(); /* reject if p-value is smaller than test level */ SG_SPRINT("gaussian approx: p!=q: %d\n", p_value_gaussian<alpha); /* compute tpye I and II error (use many more trials in practice). * Type I error is only estimated to check MMD1_GAUSSIAN method for * estimating the null distribution. Note that testing has to happen on * difference data than kernel selection, but the linear time mmd does this * implicitly and we used a fixed kernel here. */ index_t num_trials=5; SGVector<float64_t> typeIerrors(num_trials); SGVector<float64_t> typeIIerrors(num_trials); for (index_t i=0; i<num_trials; ++i) { /* this effectively means that p=q - rejecting is tpye I error */ mmd->set_simulate_h0(true); typeIerrors[i]=mmd->perform_test()>alpha; mmd->set_simulate_h0(false); typeIIerrors[i]=mmd->perform_test()>alpha; } SG_SPRINT("type I error: %f\n", CStatistics::mean(typeIerrors)); SG_SPRINT("type II error: %f\n", CStatistics::mean(typeIIerrors)); SG_UNREF(mmd); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); linear_time_mmd(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/statistics/LinearTimeMMD.h> #include <shogun/statistics/QuadraticTimeMMD.h> #ifdef USE_GPL_SHOGUN #include <shogun/statistics/MMDKernelSelectionCombOpt.h> #include <shogun/statistics/MMDKernelSelectionCombMaxL2.h> #endif //USE_GPL_SHOGUN #include <shogun/statistics/MMDKernelSelectionOpt.h> #include <shogun/statistics/MMDKernelSelectionMax.h> #include <shogun/statistics/MMDKernelSelectionMedian.h> #include <shogun/features/streaming/StreamingFeatures.h> #include <shogun/features/streaming/generators/GaussianBlobsDataGenerator.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/mathematics/Statistics.h> using namespace shogun; void kernel_choice_linear_time_mmd_opt_single() { /* Note that the linear time mmd is designed for large datasets. Results on * this small number will be bad (unstable, type I error wrong) */ index_t m=1000; index_t num_blobs=3; float64_t distance=3; float64_t stretch=10; float64_t angle=CMath::PI/4; CGaussianBlobsDataGenerator* gen_p=new CGaussianBlobsDataGenerator( num_blobs, distance, stretch, angle); CGaussianBlobsDataGenerator* gen_q=new CGaussianBlobsDataGenerator( num_blobs, distance, 1, 1); /* create kernels */ CCombinedKernel* combined=new CCombinedKernel(); float64_t sigma_from=-3; float64_t sigma_to=10; float64_t sigma_step=1; float64_t sigma=sigma_from; while (sigma<=sigma_to) { /* shoguns kernel width is different */ float64_t width=CMath::pow(2.0, sigma); float64_t sq_width_twice=width*width*2; combined->append_kernel(new CGaussianKernel(10, sq_width_twice)); sigma+=sigma_step; } /* create MMD instance */ CLinearTimeMMD* mmd=new CLinearTimeMMD(combined, gen_p, gen_q, m); /* kernel selection instance with regularisation term. May be replaced by * other methods for selecting single kernels */ CMMDKernelSelectionOpt* selection= new CMMDKernelSelectionOpt(mmd, 10E-5); // /* select kernel that maximised MMD */ // CMMDKernelSelectionMax* selection= // new CMMDKernelSelectionMax(mmd); // /* select kernel with width closest to median data distance */ // CMMDKernelSelectionMedian* selection= // new CMMDKernelSelectionMedian(mmd, 10E-5); /* compute measures. * For Opt: ratio of MMD and standard deviation * For Max: MMDs of single kernels * for Medigan: Does not work! */ SG_SPRINT("computing ratios\n"); SGVector<float64_t> ratios=selection->compute_measures(); ratios.display_vector("ratios"); /* select kernel using the maximum ratio (and cast) */ SG_SPRINT("selecting kernel\n"); CKernel* selected=selection->select_kernel(); CGaussianKernel* casted=CGaussianKernel::obtain_from_generic(selected); SG_SPRINT("selected kernel width: %f\n", casted->get_width()); mmd->set_kernel(selected); SG_UNREF(casted); SG_UNREF(selected); mmd->set_null_approximation_method(MMD1_GAUSSIAN); /* compute tpye I and II error (use many more trials). Type I error is only * estimated to check MMD1_GAUSSIAN method for estimating the null * distribution. Note that testing has to happen on difference data than * kernel selecting, but the linear time mmd does this implicitly */ float64_t alpha=0.05; index_t num_trials=5; SGVector<float64_t> typeIerrors(num_trials); SGVector<float64_t> typeIIerrors(num_trials); for (index_t i=0; i<num_trials; ++i) { /* this effectively means that p=q - rejecting is tpye I error */ mmd->set_simulate_h0(true); typeIerrors[i]=mmd->perform_test()>alpha; mmd->set_simulate_h0(false); typeIIerrors[i]=mmd->perform_test()>alpha; } SG_SPRINT("type I error: %f\n", CStatistics::mean(typeIerrors)); SG_SPRINT("type II error: %f\n", CStatistics::mean(typeIIerrors)); SG_UNREF(selection); } void kernel_choice_linear_time_mmd_opt_comb() { #ifdef USE_GPL_SHOGUN /* Note that the linear time mmd is designed for large datasets. Results on * this small number will be bad (unstable, type I error wrong) */ index_t m=1000; index_t num_blobs=3; float64_t distance=3; float64_t stretch=10; float64_t angle=CMath::PI/4; CGaussianBlobsDataGenerator* gen_p=new CGaussianBlobsDataGenerator( num_blobs, distance, stretch, angle); CGaussianBlobsDataGenerator* gen_q=new CGaussianBlobsDataGenerator( num_blobs, distance, 1, 1); /* create kernels */ CCombinedKernel* combined=new CCombinedKernel(); float64_t sigma_from=-3; float64_t sigma_to=10; float64_t sigma_step=1; float64_t sigma=sigma_from; index_t num_kernels=0; while (sigma<=sigma_to) { /* shoguns kernel width is different */ float64_t width=CMath::pow(2.0, sigma); float64_t sq_width_twice=width*width*2; combined->append_kernel(new CGaussianKernel(10, sq_width_twice)); sigma+=sigma_step; num_kernels++; } /* create MMD instance */ CLinearTimeMMD* mmd=new CLinearTimeMMD(combined, gen_p, gen_q, m); /* kernel selection instance with regularisation term. May be replaced by * other methods for selecting single kernels */ CMMDKernelSelectionCombOpt* selection= new CMMDKernelSelectionCombOpt(mmd, 10E-5); /* maximise L2 regularised MMD */ // CMMDKernelSelectionCombMaxL2* selection= // new CMMDKernelSelectionCombMaxL2(mmd, 10E-5); /* select kernel (does the same as above, but sets weights to kernel) */ SG_SPRINT("selecting kernel\n"); CKernel* selected=selection->select_kernel(); CCombinedKernel* casted=CCombinedKernel::obtain_from_generic(selected); casted->get_subkernel_weights().display_vector("weights"); mmd->set_kernel(selected); SG_UNREF(casted); SG_UNREF(selected); /* compute tpye I and II error (use many more trials). Type I error is only * estimated to check MMD1_GAUSSIAN method for estimating the null * distribution. Note that testing has to happen on difference data than * kernel selecting, but the linear time mmd does this implicitly */ mmd->set_null_approximation_method(MMD1_GAUSSIAN); float64_t alpha=0.05; index_t num_trials=5; SGVector<float64_t> typeIerrors(num_trials); SGVector<float64_t> typeIIerrors(num_trials); for (index_t i=0; i<num_trials; ++i) { /* this effectively means that p=q - rejecting is tpye I error */ mmd->set_simulate_h0(true); typeIerrors[i]=mmd->perform_test()>alpha; mmd->set_simulate_h0(false); typeIIerrors[i]=mmd->perform_test()>alpha; } SG_SPRINT("type I error: %f\n", CStatistics::mean(typeIerrors)); SG_SPRINT("type II error: %f\n", CStatistics::mean(typeIIerrors)); SG_UNREF(selection); #endif //USE_GPL_SHOGUN } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); /* select a single kernel for linear time MMD */ kernel_choice_linear_time_mmd_opt_single(); /* select combined kernels for linear time MMD */ kernel_choice_linear_time_mmd_opt_comb(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/statistics/QuadraticTimeMMD.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/streaming/generators/MeanShiftDataGenerator.h> #include <shogun/mathematics/Statistics.h> using namespace shogun; void quadratic_time_mmd() { /* number of examples kept low in order to make things fast */ index_t m=30; index_t dim=2; float64_t difference=0.5; /* streaming data generator for mean shift distributions */ CMeanShiftDataGenerator* gen_p=new CMeanShiftDataGenerator(0, dim); CMeanShiftDataGenerator* gen_q=new CMeanShiftDataGenerator(difference, dim); /* stream some data from generator */ CFeatures* feat_p=gen_p->get_streamed_features(m); CFeatures* feat_q=gen_q->get_streamed_features(m); /* set kernel a-priori. usually one would do some kernel selection. See * other examples for this. */ float64_t width=10; CGaussianKernel* kernel=new CGaussianKernel(10, width); /* create quadratic time mmd instance. Note that this constructor * copies p and q and does not reference them */ CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, feat_p, feat_q); /* perform test: compute p-value and test if null-hypothesis is rejected for * a test level of 0.05 */ float64_t alpha=0.05; /* using permutation (slow, not the most reliable way. Consider pre- * computing the kernel when using it, see below). * Also, in practice, use at least 250 iterations */ mmd->set_null_approximation_method(PERMUTATION); mmd->set_num_null_samples(3); float64_t p_value=mmd->perform_test(); /* reject if p-value is smaller than test level */ SG_SPRINT("bootstrap: p!=q: %d\n", p_value<alpha); /* using spectrum method. Use at least 250 samples from null. * This is consistent but sometimes breaks, always monitor type I error. * See tutorial for number of eigenvalues to use . * Only works with BIASED statistic */ mmd->set_statistic_type(BIASED); mmd->set_null_approximation_method(MMD2_SPECTRUM); mmd->set_num_eigenvalues_spectrum(3); mmd->set_num_samples_spectrum(250); p_value=mmd->perform_test(); /* reject if p-value is smaller than test level */ SG_SPRINT("spectrum: p!=q: %d\n", p_value<alpha); /* using gamma method. This is a quick hack, which works most of the time * but is NOT guaranteed to. See tutorial for details. * Only works with BIASED statistic */ mmd->set_statistic_type(BIASED); mmd->set_null_approximation_method(MMD2_GAMMA); p_value=mmd->perform_test(); /* reject if p-value is smaller than test level */ SG_SPRINT("gamma: p!=q: %d\n", p_value<alpha); /* compute tpye I and II error (use many more trials in practice). * Type I error is not necessary if one uses permutation. We do it here * anyway, but note that this is an efficient way of computing it. * Also note that testing has to happen on * difference data than kernel selection, but the linear time mmd does this * implicitly and we used a fixed kernel here. */ mmd->set_null_approximation_method(PERMUTATION); mmd->set_num_null_samples(5); index_t num_trials=5; SGVector<float64_t> type_I_errors(num_trials); SGVector<float64_t> type_II_errors(num_trials); SGVector<index_t> inds(2*m); inds.range_fill(); CFeatures* p_and_q=mmd->get_p_and_q(); /* use a precomputed kernel to be faster */ kernel->init(p_and_q, p_and_q); CCustomKernel* precomputed=new CCustomKernel(kernel); mmd->set_kernel(precomputed); for (index_t i=0; i<num_trials; ++i) { /* this effectively means that p=q - rejecting is tpye I error */ CMath::permute(inds); precomputed->add_row_subset(inds); precomputed->add_col_subset(inds); type_I_errors[i]=mmd->perform_test()>alpha; precomputed->remove_row_subset(); precomputed->remove_col_subset(); /* on normal data, this gives type II error */ type_II_errors[i]=mmd->perform_test()>alpha; } SG_UNREF(p_and_q); SG_SPRINT("type I error: %f\n", CStatistics::mean(type_I_errors)); SG_SPRINT("type II error: %f\n", CStatistics::mean(type_II_errors)); /* clean up */ SG_UNREF(mmd); SG_UNREF(gen_p); SG_UNREF(gen_q); /* convienience constructor of MMD was used, these were not referenced */ SG_UNREF(feat_p); SG_UNREF(feat_q); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); quadratic_time_mmd(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This file demonstrates how a regular CDenseFeatures object can * be used as input for the StreamingFeatures framework, effectively * making it suitable for using online learning algorithms. */ #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/io/streaming/StreamingFileFromDenseFeatures.h> #include <shogun/mathematics/Math.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <shogun/base/init.h> #include <stdlib.h> #include <stdio.h> using namespace shogun; #define NUM 10 #define DIMS 2 #define DIST 0.5 void gen_rand_data(SGMatrix<float64_t> feat, SGVector<float64_t> lab) { for (int32_t i=0; i<NUM; i++) { if (i<NUM/2) { for (int32_t j=0; j<DIMS; j++) feat[i*DIMS+j]=CMath::random(0.0, 1.0)+DIST; if (lab.vector) lab[i]=0; } else { for (int32_t j=0; j<DIMS; j++) feat[i*DIMS+j]=CMath::random(0.0, 1.0)-DIST; if (lab.vector) lab[i]=1; } } feat.display_matrix("feat"); lab.display_vector("lab"); } void test_general() { SGMatrix<float64_t> feat(DIMS, NUM); SGVector<float64_t> lab(NUM); // Generate random data, features and labels gen_rand_data(feat, lab); // Create features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); SG_REF(features); features->set_feature_matrix(feat); // Create a StreamingDenseFeatures object which uses the above as input; // labels (float64_t*) are optional CStreamingDenseFeatures<float64_t>* streaming=new CStreamingDenseFeatures< float64_t>(features, lab); SG_REF(streaming); // Start parsing of the examples; in this case, it is trivial - returns each vector from the DenseFeatures object streaming->start_parser(); int32_t counter=0; SG_SPRINT("Processing examples...\n\n"); // Run a while loop over all the examples. Note that since // features are "streaming", there is no predefined // number_of_vectors known to the StreamingFeatures object. // Thus, this loop must be used to iterate over all the // features while (streaming->get_next_example()) { counter++; // Get the current vector; no other vector is accessible SGVector<float64_t> vec=streaming->get_vector(); float64_t label=streaming->get_label(); SG_SPRINT("Vector %d: [\t", counter); for (int32_t i=0; i<vec.vlen; i++) { SG_SPRINT("%f\t", vec.vector[i]); } SG_SPRINT("Label=%f\t", label); // Calculate dot product of the current vector (from // the StreamingFeatures object) with itself (the // vector passed as argument) float64_t dot_prod=streaming->dense_dot(vec.vector, vec.vlen); SG_SPRINT("]\nDot product of the vector with itself: %f", dot_prod); SG_SPRINT("\n\n"); // Free the example, since we are done with processing it. streaming->release_example(); } // Now that all examples are used, end the parser. streaming->end_parser(); SG_UNREF(streaming); SG_UNREF(features); } void test_get_streamed_features() { /* create streaming features from dense features and then make call and * assert that data is equal */ SGMatrix<float64_t> feat(DIMS, NUM); SGVector<float64_t> lab(NUM); // Generate random data, features and labels gen_rand_data(feat, lab); // Create features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); SG_REF(features); features->set_feature_matrix(feat); // Create a StreamingDenseFeatures object which uses the above as input; // labels (float64_t*) are optional CStreamingDenseFeatures<float64_t>* streaming=new CStreamingDenseFeatures< float64_t>(features, lab); SG_REF(streaming); streaming->start_parser(); CDenseFeatures<float64_t>* dense= (CDenseFeatures<float64_t>*)streaming->get_streamed_features(NUM); streaming->end_parser(); /* assert that matrices are equal */ ASSERT(dense->get_feature_matrix().equals(feat)); SG_UNREF(dense); SG_UNREF(features); SG_UNREF(streaming); } void test_get_streamed_features_too_many() { /* create streaming features from dense features and then make call and * assert that data is equal. requests more data than available */ SGMatrix<float64_t> feat(DIMS, NUM); SGVector<float64_t> lab(NUM); // Generate random data, features and labels gen_rand_data(feat, lab); // Create features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); SG_REF(features); features->set_feature_matrix(feat); // Create a StreamingDenseFeatures object which uses the above as input; // labels (float64_t*) are optional CStreamingDenseFeatures<float64_t>* streaming=new CStreamingDenseFeatures< float64_t>(features, lab); SG_REF(streaming); streaming->start_parser(); /* request more features than available */ CDenseFeatures<float64_t>* dense= (CDenseFeatures<float64_t>*)streaming->get_streamed_features(NUM+10); streaming->end_parser(); /* assert that matrices are equal */ ASSERT(dense->get_feature_matrix().equals(feat)); SG_UNREF(dense); SG_UNREF(features); SG_UNREF(streaming); } int main() { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); test_general(); test_get_streamed_features(); test_get_streamed_features_too_many(); // exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the online variant of SGD which * relies on the streaming features framework. */ #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/classifier/svm/OnlineLibLinear.h> using namespace shogun; int main() { init_shogun_with_defaults(); // Create a StreamingAsciiFile from the training data const char* train_file_name = "../data/train_densereal.light"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); // The bool value is true if examples are labelled. // 1024 is a good standard value for the number of examples for the parser to hold at a time. CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024); SG_REF(train_features); // Create an OnlineLiblinear object from the features. The first parameter is 'C'. COnlineLibLinear* svm = new COnlineLibLinear(1, train_features); svm->set_bias_enabled(false); // Enable/disable bias svm->train(); // Train train_file->close(); // Now we want to test on other data const char* test_file_name = "../data/fm_test_densereal.dat"; CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); // Similar, but 'false' since the file contains unlabelled examples CStreamingDenseFeatures<float64_t>* test_features = new CStreamingDenseFeatures<float64_t>(test_file, false, 1024); SG_REF(test_features); // Apply on all examples and return a CLabels* CRegressionLabels* test_labels = svm->apply_regression(test_features); for (int32_t i=0; i<test_labels->get_num_labels(); i++) SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i)); SG_UNREF(test_features); SG_UNREF(test_labels); SG_UNREF(test_file); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2013 Thoralf Klein * Copyright (C) 2013 Zuse-Institute-Berlin * * This example demonstrates use of the online learning with * OnlineLibLinear using sparse streaming features. This example * also parses command line options: Can be used as stand-alone * program to do binary classifications on user-provided inputs. */ #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/Time.h> #include <shogun/classifier/svm/OnlineLibLinear.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/features/streaming/StreamingSparseFeatures.h> #include <shogun/labels/BinaryLabels.h> using namespace shogun; int main(int argc, char* argv[]) { init_shogun_with_defaults(); float64_t C = 1.0; char *train_file_name = (char*)"../data/train_sparsereal.light"; char *test_file_name = (char*)"../data/test_sparsereal.light"; char filename_tmp[] = "test_sparsereal.light.labels.XXXXXX"; int fd = mkstemp(filename_tmp); ASSERT(fd != -1); int retval = close(fd); ASSERT(retval != -1); char *test_labels_file_name = filename_tmp; if (argc > 4) { int32_t idx = 1; C = atof(argv[idx++]); train_file_name = argv[idx++]; test_file_name = argv[idx++]; test_labels_file_name = argv[idx++]; ASSERT(idx <= argc); } fprintf(stderr, "*** training file %s with C %g\n", train_file_name, C); // Create an OnlineLiblinear object from the features. The first parameter is 'C'. COnlineLibLinear *svm = new COnlineLibLinear(C); svm->set_bias_enabled(true); { CTime train_time; train_time.start(); // Create a StreamingAsciiFile from the training data CStreamingAsciiFile *train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); // The bool value is true if examples are labelled. // 1024 is a good standard value for the number of examples for the parser to hold at a time. CStreamingSparseFeatures < float32_t > *train_features = new CStreamingSparseFeatures < float32_t > (train_file, true, 1024); SG_REF(train_features); svm->set_features(train_features); svm->train(); train_file->close(); SG_UNREF(train_file); SG_UNREF(train_features); train_time.stop(); SGVector<float32_t> w_now = svm->get_w().clone(); float32_t w_now_norm = SGVector<float32_t>::twonorm(w_now.vector, w_now.vlen); uint64_t train_time_int = train_time.cur_time_diff(); fprintf(stderr, "*** total training time: %llum%llus (or %.1f sec), #dim = %d, ||w|| = %f\n", train_time_int / 60, train_time_int % 60, train_time.cur_time_diff(), w_now.vlen, w_now_norm ); } { CTime test_time; test_time.start(); // Now we want to test on holdout data CStreamingAsciiFile *test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); // Set second parameter to 'false' if the file contains unlabelled examples CStreamingSparseFeatures < float32_t > *test_features = new CStreamingSparseFeatures < float32_t > (test_file, true, 1024); SG_REF(test_features); // Apply on all examples and return a CBinaryLabels* CBinaryLabels *test_binary_labels = svm->apply_binary(test_features); SG_REF(test_binary_labels); test_time.stop(); uint64_t test_time_int = test_time.cur_time_diff(); fprintf(stderr, "*** testing took %llum%llus (or %.1f sec)\n", test_time_int / 60, test_time_int % 60, test_time.cur_time_diff()); SG_UNREF(test_features); SG_UNREF(test_file); // Writing labels for evaluation fprintf(stderr, "*** writing labels to file %s\n", test_labels_file_name); FILE* fh = fopen(test_labels_file_name, "wb"); ASSERT(fh); for (int32_t j = 0; j < test_binary_labels->get_num_labels(); j++) fprintf(fh, "%d\n", test_binary_labels->get_int_label(j)); fclose(fh); SG_UNREF(test_binary_labels); unlink(test_labels_file_name); } SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the online variant of SGD which * relies on the streaming features framework. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingAsciiFile.h> #include <shogun/features/StreamingSparseFeatures.h> #include <shogun/classifier/svm/OnlineSVMSGD.h> using namespace shogun; int main() { init_shogun_with_defaults(); // Create a StreamingAsciiFile from the training data char* train_file_name = "../data/train_sparsereal.light"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); // Create a StreamingSparseFeatures from the StreamingAsciiFile. // The bool value is true if examples are labelled. // 1024 is a good standard value for the number of examples for the parser to hold at a time. CStreamingSparseFeatures<float64_t>* train_features = new CStreamingSparseFeatures<float64_t>(train_file, true, 1024); SG_REF(train_features); // Create an OnlineSVMSGD object from the features. The first parameter is 'C'. COnlineSVMSGD* sgd = new COnlineSVMSGD(1, train_features); sgd->set_bias_enabled(false); // Enable/disable bias sgd->set_lambda(0.1); // Choose lambda sgd->train(); // Train train_file->close(); // Now we want to test on other data char* test_file_name = "../data/fm_test_sparsereal.dat"; CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); // Similar, but 'false' since the file contains unlabelled examples CStreamingSparseFeatures<float64_t>* test_features = new CStreamingSparseFeatures<float64_t>(test_file, false, 1024); SG_REF(test_features); // Apply on all examples and return a CLabels* CLabels* test_labels = sgd->apply(test_features); for (int32_t i=0; i<test_labels->get_num_labels(); i++) SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i)); SG_UNREF(test_features); SG_UNREF(test_file); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(sgd); exit_shogun(); return 0; }
// This example simply demonstrates the use/working of StreamingStringFeatures #include <shogun/lib/common.h> #include <shogun/lib/config.h> #include <shogun/lib/SGString.h> #include <shogun/base/init.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/features/streaming/StreamingStringFeatures.h> using namespace shogun; void display_vector(const SGString<char> &vec) { printf("\nNew Vector\n------------------\n"); printf("Length=%d.\n", vec.slen); for (int32_t i=0; i<vec.slen; i++) { printf("%c", vec.string[i]); } printf("\n"); } int main(int argc, char **argv) { init_shogun_with_defaults(); // Create a StreamingAsciiFile from our input file CStreamingAsciiFile* file = new CStreamingAsciiFile("../data/fm_train_dna.dat"); SG_REF(file); // This file contains unlabelled data, so the second arg is `false'. CStreamingStringFeatures<char>* feat = new CStreamingStringFeatures<char>(file, false, 1024); SG_REF(feat); // Alphabet to use is DNA feat->use_alphabet(DNA); // Loop over all examples and simply display each example feat->start_parser(); while (feat->get_next_example()) { SGString<char> vec = feat->get_vector(); display_vector(vec); feat->release_example(); } feat->end_parser(); // Get the alphabet and display the histogram CAlphabet* alpha = feat->get_alphabet(); printf("\nThe histogram is:\n"); alpha->print_histogram(); SG_UNREF(alpha); SG_UNREF(feat); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the Vowpal Wabbit learning algorithm. */ #include <shogun/lib/common.h> #include <shogun/io/streaming/StreamingVwFile.h> #include <shogun/features/streaming/StreamingVwFeatures.h> #include <shogun/classifier/vw/VowpalWabbit.h> using namespace shogun; int main() { init_shogun_with_defaults(); const char* train_file_name = "../data/train_sparsereal.light"; CStreamingVwFile* train_file = new CStreamingVwFile(train_file_name); train_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format SG_REF(train_file); CStreamingVwFeatures* train_features = new CStreamingVwFeatures(train_file, true, 1024); SG_REF(train_features); CVowpalWabbit* vw = new CVowpalWabbit(train_features); vw->set_regressor_out("./vw_regressor_text.dat"); // Save regressor to this file vw->set_adaptive(false); // Use adaptive learning vw->train_machine(); SG_SPRINT("Weights have been output in text form to vw_regressor_text.dat.\n"); train_file->close(); CStreamingVwFile* test_file = new CStreamingVwFile(train_file_name); test_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format CStreamingVwFeatures* test_features = new CStreamingVwFeatures(test_file, true, 1024); test_features->start_parser(); while (test_features->get_next_example()) { VwExample *example = test_features->get_example(); float64_t pred = vw->predict_and_finalize(example); printf("%.2lf\n", pred); test_features->release_example(); } test_features->end_parser(); test_file->close(); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(vw); SG_UNREF(test_features); SG_UNREF(test_file); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of online SGD with CStreamingVwFeatures * as the features object. */ #include <shogun/lib/common.h> #include <shogun/lib/config.h> #include <shogun/base/init.h> #include <shogun/io/streaming/StreamingVwFile.h> #include <shogun/features/streaming/StreamingVwFeatures.h> #include <shogun/classifier/svm/OnlineSVMSGD.h> using namespace shogun; int main() { init_shogun_with_defaults(); const char* train_file_name = "../data/train_sparsereal.light"; CStreamingVwFile* train_file = new CStreamingVwFile(train_file_name); train_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format SG_REF(train_file); CStreamingVwFeatures* train_features = new CStreamingVwFeatures(train_file, true, 1024); SG_REF(train_features); COnlineSVMSGD* sgd = new COnlineSVMSGD(1, train_features); sgd->set_bias_enabled(false); sgd->set_lambda(0.1); sgd->train(); // Now we want to test on other data const char* test_file_name = "../data/fm_test_sparsereal.dat"; CStreamingVwFile* test_file = new CStreamingVwFile(test_file_name); test_file->set_parser_type(T_SVMLIGHT); SG_REF(test_file); //Similar, but 'false' since the file contains unlabelled examples CStreamingVwFeatures* test_features = new CStreamingVwFeatures(test_file, false, 1024); SG_REF(test_features); // Apply on all examples and return a CLabels* CBinaryLabels* test_labels = sgd->apply_binary(test_features); for (int32_t i=0; i<test_labels->get_num_labels(); i++) SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i)); SG_UNREF(test_features); SG_UNREF(sgd); SG_UNREF(train_features); SG_UNREF(test_labels); exit_shogun(); return 0; }
#include <shogun/structure/HMSVMModel.h> #include <shogun/structure/DualLibQPBMSOSVM.h> #include <shogun/structure/StateModelTypes.h> #include <shogun/features/MatrixFeatures.h> using namespace shogun; int main() { init_shogun_with_defaults(); float64_t features_dat[] = {0,1,1, 2,1,2, 0,1,0, 0,2,2}; SGMatrix<float64_t> features_mat(features_dat,1,12,false); CMatrixFeatures<float64_t>* features = new CMatrixFeatures<float64_t>(features_mat,3,4); int32_t labels_dat[] = {0,0,0, 1,1,1, 0,0,0, 1,1,1}; SGVector<int32_t> labels_vec(labels_dat,12,false); CSequenceLabels* labels = new CSequenceLabels(labels_vec,3,4,2); labels->io->set_loglevel(MSG_DEBUG); CHMSVMModel* model = new CHMSVMModel(features, labels, SMT_TWO_STATE, 3); CDualLibQPBMSOSVM* sosvm = new CDualLibQPBMSOSVM(model, labels, 5000,0); sosvm->train(); SG_UNREF(sosvm); exit_shogun(); return 0; }
#include <shogun/labels/StructuredLabels.h> #include <shogun/labels/LabelsFactory.h> #include <shogun/structure/HMSVMModel.h> #include <shogun/structure/PrimalMosekSOSVM.h> #include <shogun/structure/TwoStateModel.h> using namespace shogun; int main(int argc, char ** argv) { init_shogun_with_defaults(); #ifdef USE_MOSEK int32_t num_examples = 10; int32_t example_length = 250; int32_t num_features = 10; int32_t num_noise_features = 2; CHMSVMModel* model = CTwoStateModel::simulate_data(num_examples, example_length, num_features, num_noise_features); CStructuredLabels* labels = model->get_labels(); CFeatures* features = model->get_features(); CPrimalMosekSOSVM* sosvm = new CPrimalMosekSOSVM(model, labels); SG_REF(sosvm); sosvm->train(); // sosvm->get_w().display_vector("w"); CStructuredLabels* out = CLabelsFactory::to_structured(sosvm->apply()); ASSERT( out->get_num_labels() == labels->get_num_labels() ); for ( int32_t i = 0 ; i < out->get_num_labels() ; ++i ) { CSequence* pred_seq = CSequence::obtain_from_generic( out->get_label(i) ); CSequence* true_seq = CSequence::obtain_from_generic( labels->get_label(i) ); SG_UNREF(pred_seq); SG_UNREF(true_seq); } SG_UNREF(out); SG_UNREF(features); // because model->get_features() increased the count SG_UNREF(labels); // because model->get_labels() increased the count SG_UNREF(sosvm); #endif /* USE_MOSEK */ exit_shogun(); return 0; }
#include <shogun/structure/TwoStateModel.h> #include <shogun/structure/HMSVMModel.h> #include <shogun/structure/DualLibQPBMSOSVM.h> using namespace shogun; int main() { init_shogun_with_defaults(); CTwoStateModel* tsm = new CTwoStateModel(); CHMSVMModel* model = tsm->simulate_data(100,250,3,1); CStructuredLabels* labels = model->get_labels(); CDualLibQPBMSOSVM* sosvm = new CDualLibQPBMSOSVM(model, labels, 5000.0); sosvm->train(); SG_UNREF(sosvm); SG_UNREF(labels); SG_UNREF(tsm); exit_shogun(); return 0; }
#include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/transfer/multitask/MultitaskLeastSquaresRegression.h> #include <shogun/transfer/multitask/Task.h> #include <shogun/transfer/multitask/TaskTree.h> #include <shogun/transfer/multitask/TaskGroup.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data SGMatrix<float64_t> matrix(2,4); for (int32_t i=0; i<2*4; i++) matrix.matrix[i]=i; CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create three labels CRegressionLabels* labels=new CRegressionLabels(4); labels->set_label(0, -1.4); labels->set_label(1, +1.5); labels->set_label(2, -1.2); labels->set_label(3, +1.1); CTask* first_task = new CTask(0,2); CTask* second_task = new CTask(2,4); CTaskGroup* task_group = new CTaskGroup(); task_group->append_task(first_task); task_group->append_task(second_task); CMultitaskLeastSquaresRegression* regressor = new CMultitaskLeastSquaresRegression(0.5,features,labels,task_group); regressor->train(); regressor->set_current_task(0); regressor->get_w().display_vector(); SG_UNREF(regressor); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
#include <shogun/lib/config.h> #ifdef USE_GPL_SHOGUN #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/transfer/multitask/MultitaskLogisticRegression.h> #include <shogun/transfer/multitask/Task.h> #include <shogun/transfer/multitask/TaskTree.h> #include <shogun/transfer/multitask/TaskGroup.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun_with_defaults(); // create some data SGMatrix<float64_t> matrix(2,4); for (int32_t i=0; i<2*4; i++) matrix.matrix[i]=i; CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create three labels CBinaryLabels* labels=new CBinaryLabels(4); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); labels->set_label(3, +1); CTask* first_task = new CTask(0,2); CTask* second_task = new CTask(2,4); CTaskGroup* task_group = new CTaskGroup(); task_group->append_task(first_task); task_group->append_task(second_task); CMultitaskLogisticRegression* regressor = new CMultitaskLogisticRegression(0.5,features,labels,task_group); regressor->train(); regressor->set_current_task(0); regressor->get_w().display_vector(); CTask* root_task = new CTask(0,4); root_task->add_subtask(first_task); root_task->add_subtask(second_task); CTaskTree* task_tree = new CTaskTree(root_task); regressor->set_task_relation(task_tree); regressor->train(); regressor->set_current_task(0); regressor->get_w().display_vector(); SG_UNREF(regressor); exit_shogun(); return 0; } #else //USE_GPL_SHOGUN int main(int argc, char** argv) { return 0; } #endif //USE_GPL_SHOGUN
/* * Copyright (c) The Shogun Machine Learning Toolbox * Written (w) 2014 Wu Lin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * The views and conclusions contained in the software and documentation are those * of the authors and should not be interpreted as representing official policies, * either expressed or implied, of the Shogun Development Team. * * Code adapted from * https://github.com/emtiyaz/VariationalApproxExample * and the reference paper is * Marlin, Benjamin M., Mohammad Emtiyaz Khan, and Kevin P. Murphy. * "Piecewise Bounds for Estimating Bernoulli-Logistic Latent Gaussian Models." ICML. 2011. * * This code specifically adapted from example.m and simpleVariational.m */ // Eigen3 is required for working with this example #include <shogun/lib/config.h> #include <shogun/base/init.h> #include <shogun/machine/gp/LogitVGPiecewiseBoundLikelihood.h> #include <shogun/distributions/classical/GaussianDistribution.h> #include <shogun/optimization/lbfgs/lbfgs.h> #include <shogun/mathematics/Math.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/mathematics/eigen3.h> #include <shogun/io/CSVFile.h> #include <cstdio> using namespace shogun; //init the variational Piecewise bound SGMatrix<float64_t> init_piecewise_bound(const char * fname) { SGMatrix<float64_t> bound; CCSVFile* bound_file = new CCSVFile(fname); bound_file->set_delimiter('\t'); bound.load(bound_file); SG_UNREF(bound_file); return bound; } //The following pre-init value is used to verify the correctness //The following code will be removed. SGVector<float64_t> load_m_from_matlab(const char * fname) { SGVector<float64_t> m_from_matlab; CCSVFile* m_file = new CCSVFile(fname); m_file->set_delimiter('\t'); m_from_matlab.load(m_file); SG_UNREF(m_file); return m_from_matlab; } //The following pre-init value is used to verify the correctness //The following code will be removed. float64_t load_loglik_from_matlab(const char * fname) { SGVector<float64_t> f_from_matlab; CCSVFile* f_file = new CCSVFile(fname); f_file->set_delimiter('\t'); f_from_matlab.load(f_file); SG_UNREF(f_file); REQUIRE(f_from_matlab.vlen == 1, "logLik is a scalar"); return f_from_matlab[0]; } //Randomly generating the input feature (X) SGMatrix<float64_t> create_feature(const char *fname, index_t num_sample, index_t num_dim) { REQUIRE(num_sample % 2 == 0, "For this example we assume the num_sample is even"); /* //X = [5*rand(N/2,D); -5*rand(N/2,D)]; //The following code is used to generate synthetic data SGMatrix<float64_t> X(num_sample,num_dim); for(index_t i = 0; i < num_sample; i++) { for(index_t j = 0; j < num_dim; j++) { if (i < num_sample/2) X(i, j) = CMath::random(0,1)*5.0; else X(i, j) = CMath::random(0,1)*-5.0; } } */ //The following pre-init value is used to verify the correctness //The following code will be removed. SGMatrix<float64_t> X; CCSVFile* X_file = new CCSVFile(fname); X_file->set_delimiter('\t'); X.load(X_file); SG_UNREF(X_file); return X; } //Randomly generating the observated labels (y) followed by Guassian distribution (synthetic data) SGVector<float64_t> create_label(const char * fname, SGVector<float64_t> mu, SGMatrix<float64_t> sigma) { REQUIRE(sigma.num_rows == sigma.num_cols, "Sigma should be a covariance (square) matrix"); REQUIRE(sigma.num_rows == mu.vlen, "Sigma and mu should have the same dimensionality"); /* //The following code is used to generate synthetic data index_t num_sample = sigma.num_rows; SGVector<float64_t> y(num_sample); Eigen::Map<Eigen::MatrixXd> eigen_sigma(sigma.matrix, sigma.num_rows, sigma.num_cols); //y = mvnrnd(mu, Sigma, 1); CProbabilityDistribution * dist = new CGaussianDistribution(mu, sigma); y = dist->sample(); //y = (y(:)>0); //Note that Shogun uses -1 and 1 as labels for( index_t i = 0; i < y.vlen; ++i) { if (y[i] > 0) y[i] = 1; else y[i] = -1; } SG_UNREF(dist); */ //The following pre-init value is used to verify the correctness //The following code will be removed. //Note that Shogun uses -1 and 1 as labels SGVector<float64_t> y; CCSVFile* y_file = new CCSVFile(fname); y_file->set_delimiter('\t'); y.load(y_file); SG_UNREF(y_file); for(index_t i = 0; i < y.vlen; i++) { if (y[i] > 0) y[i] = 1; else y[i] = -1; } REQUIRE(y.vlen == mu.vlen, "The labels loaded from the file should have the same dimensionality of mu"); return y; } //The following struct is used to pass information when using the build-in L-BFGS component struct Shared { CLogitVGPiecewiseBoundLikelihood *lik; SGVector<float64_t> y; SGVector<float64_t> mu; lbfgs_parameter_t lbfgs_param; SGVector<float64_t> m0; SGVector<float64_t> v; SGMatrix<float64_t> sigma; SGMatrix<float64_t> data; SGMatrix<float64_t> bound; Eigen::LDLT<Eigen::MatrixXd> ldlt; }; //Init the parameters used for L-BFGS lbfgs_parameter_t inti_lbfgs_parameters() { lbfgs_parameter_t tmp; tmp.m = 100; tmp.max_linesearch = 1000; tmp.linesearch = LBFGS_LINESEARCH_DEFAULT; tmp.max_iterations = 1000; tmp.delta = 1e-15; tmp.past = 0; tmp.epsilon = 1e-15; tmp.min_step = 1e-20; tmp.max_step = 1e+20; tmp.ftol = 1e-4; tmp.wolfe = 0.9; tmp.gtol = 0.9; tmp.xtol = 1e-16; tmp.orthantwise_c = 0; tmp.orthantwise_start = 0; tmp.orthantwise_end = 1; return tmp; } //This function is similar to the Matlab code, simpleVariational.m float64_t evaluate(void *obj, const float64_t *variable, float64_t *gradient, const int dim, const float64_t step) { Shared * obj_prt = static_cast<Shared *>(obj); CBinaryLabels lab(obj_prt->y); obj_prt->lik->set_variational_distribution(obj_prt->m0, obj_prt->v, &lab); Eigen::Map<Eigen::VectorXd> eigen_mu(obj_prt->mu.vector, obj_prt->mu.vlen); Eigen::Map<Eigen::VectorXd> eigen_m(obj_prt->m0.vector, obj_prt->m0.vlen); //[fi, gmi, gvi] = ElogLik('bernLogit', y, m, v, bound); get fi at here SGVector<float64_t> fi = obj_prt->lik->get_variational_expection(); TParameter* mu_param = obj_prt->lik->m_parameters->get_parameter("mu"); //[fi, gmi, gvi] = ElogLik('bernLogit', y, m, v, bound); get gmi at here SGVector<float64_t> gmi = obj_prt->lik->get_variational_first_derivative(mu_param); SGVector<float64_t> g(dim); Eigen::Map<Eigen::VectorXd> eigen_g(g.vector, g.vlen); //e = m-mu; //g = Omega*e; eigen_g = obj_prt->ldlt.solve(eigen_m - eigen_mu); //f = -e'*g/2 + sum(fi); Eigen::VectorXd ff = -0.5*((eigen_m-eigen_mu).transpose()*eigen_g); ASSERT(ff.size() == 1); float64_t f = ff(0) + SGVector<float64_t>::sum(fi); Eigen::Map<Eigen::VectorXd> eigen_gradient(gradient, dim); //get the gradient based on the current variable Eigen::Map<Eigen::VectorXd> eigen_gmi(gmi.vector, gmi.vlen); //g = -g + gmi; //g = -g; eigen_gradient = eigen_g - eigen_gmi; //f = -f; return -f; } void run(const char * x_file, const char * y_file, const char * bound_file, const char * m_file, const char * loglik_file) { //N = 20; % number of data examples index_t num_sample = 20; //D = 5; % feature dimensionality index_t num_dim = 5; Shared obj; //X = [5*rand(N/2,D); -5*rand(N/2,D)]; obj.data = create_feature(x_file, num_sample, num_dim); //if we read from file num_sample = obj.data.num_rows; num_dim = obj.data.num_cols; SG_SPRINT("num_samples:%d num_dimensions:%d\n", num_sample, num_dim); //Sigma = X*X' + eye(N); % linear kernel obj.sigma = SGMatrix<float64_t> (num_sample, num_sample); Eigen::Map<Eigen::MatrixXd> eigen_data(obj.data.matrix, obj.data.num_rows, obj.data.num_cols); Eigen::Map<Eigen::MatrixXd> eigen_sigma(obj.sigma.matrix, obj.sigma.num_rows, obj.sigma.num_cols); //Sigma = X*X' + eye(N); eigen_sigma = eigen_data * (eigen_data.transpose()) + Eigen::MatrixXd::Identity(num_sample, num_sample); //mu = zeros(N,1); % zero mean obj.mu = SGVector<float64_t> (num_sample); Eigen::Map<Eigen::VectorXd> eigen_mu(obj.mu.vector, obj.mu.vlen); //mu = zeros(N,1); % zero mean eigen_mu.fill(0); //y = mvnrnd(mu, Sigma, 1); //y = (y(:)>0); obj.y = create_label(y_file, obj.mu, obj.sigma); //% optimizers options //optMinFunc = struct('Display', 1,... //'Method', 'lbfgs',... //'DerivativeCheck', 'off',... //'LS', 2,... //'MaxIter', 1000,... //'MaxFunEvals', 1000,... //'TolFun', 1e-4,...... //'TolX', 1e-4); obj.lbfgs_param = inti_lbfgs_parameters(); //load('llp.mat'); obj.bound = init_piecewise_bound(bound_file); obj.lik = new CLogitVGPiecewiseBoundLikelihood(); obj.lik->set_variational_bound(obj.bound); //m0 = mu; % initial value all zero obj.m0 = SGVector<float64_t> (num_sample); obj.v = SGVector<float64_t> (num_sample); Eigen::Map<Eigen::VectorXd> eigen_m0(obj.m0.vector, obj.m0.vlen); //m0 = mu; % initial value eigen_m0 = eigen_mu; //v = ones(N,1); % fix v to 1 Eigen::Map<Eigen::VectorXd> eigen_v(obj.v, num_sample); eigen_v.fill(1); //Omega = inv(Sigma); obj.ldlt.compute(eigen_sigma); //sigma is positive definitive ASSERT(obj.ldlt.isPositive()); float64_t logLik = 0.0; //[m, logLik] = minFunc(@simpleVariational, m0, optMinFunc, y, X, mu, Omega, v, bound); int ret = lbfgs(obj.m0.vlen, obj.m0.vector, &logLik, evaluate, NULL, &obj, &obj.lbfgs_param); SGVector<float64_t> m_from_matlab = load_m_from_matlab(m_file); float64_t logLik_from_matlab = load_loglik_from_matlab(loglik_file); ASSERT(m_from_matlab.vlen == num_sample); SG_SPRINT("lbfgs status =%d\n",ret); SG_SPRINT("logLik from Shogun =%.10f from Matlab =%.10f\n", logLik, logLik_from_matlab); SG_SPRINT("opt m =\n"); for(index_t i = 0; i < obj.m0.vlen; ++i) { float64_t relative_diff; if (m_from_matlab[i] != 0.0) relative_diff = CMath::abs(obj.m0[i]/m_from_matlab[i] - 1); else relative_diff = CMath::abs(obj.m0[i]); SG_SPRINT("m[%d] from Shogun =%.10f from Matlab = %.10f relative_diff = %.10f\n", i+1, obj.m0[i], m_from_matlab[i], relative_diff); } SG_UNREF(obj.lik); } void test_datasets() { const index_t buff_size = 1024; const char * data_path = "../data/variational"; char bound_path_buffer[buff_size]; char x_path_buffer[buff_size]; char y_path_buffer[buff_size]; char m_path_buffer[buff_size]; char loglik_path_buffer[buff_size]; snprintf(bound_path_buffer, buff_size, "%s/bounds", data_path); FILE* pfile = fopen(bound_path_buffer, "r"); if (pfile == NULL) { SG_SPRINT("Unable to open file: %s\n", bound_path_buffer); return; } fclose(pfile); for (index_t i = 4; i <= 6; i++) { snprintf(x_path_buffer, buff_size, "%s/X_dataset%d", data_path, i); snprintf(y_path_buffer, buff_size, "%s/y_dataset%d", data_path, i); snprintf(m_path_buffer, buff_size, "%s/m_dataset%d", data_path, i); snprintf(loglik_path_buffer, buff_size, "%s/logLik_dataset%d", data_path, i); SG_SPRINT("\nDataset %d\n", i); run(x_path_buffer, y_path_buffer, bound_path_buffer, m_path_buffer, loglik_path_buffer); } } int main(int argc, char** argv) { init_shogun_with_defaults(); test_datasets(); exit_shogun(); return 0; }